Commit b5793e38 authored by 张彦钊's avatar 张彦钊

add exp

parent 1918ceb9
......@@ -37,7 +37,26 @@ def exp():
print(sorted(channel_map.items(), key=lambda x: x[1]))
def clean():
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select device_id,device_type,manufacturer,channel,city_id from user_feature"
df = con_sql(db, sql)
df = df.rename(columns={0: "device_id",1: "device_type", 2: "manufacturer", 3: "channel", 4: "city_id"})
n = df.shape[0]
manufacturer = df["manufacturer"].unique()
for i in manufacturer:
if df.loc[df["manufacturer"]==i].shape[0]/n < 0.0005:
df.loc[df["manufacturer"] == i,["manufacturer"]] = "other"
print(df["manufacturer"].unique())
channel = df["channel"].unique()
for i in channel:
if df.loc[df["channel"] == i].shape[0] / n < 0.0001:
df.loc[df["channel"] == i, ["channel"]] = "other"
print(df["channel"].unique())
if __name__ == "__main__":
exp()
clean()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment