Commit 5d6bc463 authored by 张彦钊's avatar 张彦钊

按照用户去重应用列表样本

parent c102f5d4
......@@ -29,10 +29,13 @@ def json_format(x):
def sort_app():
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
sql = "select device_id,app_list from device_id_applist"
sql = "select device_id,app_list,stat_date from device_id_applist"
df = con_sql(db, sql).dropna()
df = df.rename(columns={0: "device_id", 1: "app_list"})
df = df.rename(columns={0: "device_id", 1: "app_list",2:"stat_date"})
print(df.shape)
df = df.sort_values(by="stat_date",ascending=False)
print(df.head())
df = df.drop("stat_date",axis=1)
df = df.drop_duplicates("device_id")
print(df.shape)
df = df.loc[df["app_list"].apply(is_json)]
......
......@@ -177,6 +177,7 @@ def get_data():
for i in ["ucity_id","clevel1_id","ccity_name","device_type","manufacturer","channel","top"]:
features = features + len(df[i].unique())
df[i] = df[i].fillna(0)
df["time"] = df["time"].fillna(df["time"].mode()[0])
print(df.count())
print(df.head(2))
print("fields:{}".format(df.shape[1]-1))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment