Commit cb63647a authored by 张彦钊's avatar 张彦钊

按照用户去重应用列表样本

parent b058016d
......@@ -34,7 +34,6 @@ def sort_app():
df = df.rename(columns={0: "device_id", 1: "app_list",2:"stat_date"})
print(df.shape)
df = df.sort_values(by="stat_date",ascending=False)
print(df.head())
df = df.drop("stat_date",axis=1)
df = df.drop_duplicates("device_id")
print(df.shape)
......@@ -95,11 +94,11 @@ def sort_app():
for i in range(0,df.shape[0],n):
print(i)
if i == 0:
temp = df.loc[0:n]
temp = df.iloc[0:n]
elif i+n > df.shape[0]:
temp = df.loc[i+1:]
temp = df.iloc[i:]
else:
temp = df.loc[i+1:i+n]
temp = df.loc[i:i+n]
pd.io.sql.to_sql(temp, "app_list_sort", yconnect, schema='jerry_test', if_exists='append', index=False)
print("insert done")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment