Commit 99b7a9b0 authored by 高雅喆's avatar 高雅喆

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

drop duplicate
parents 6f906fb2 cb63647a
...@@ -34,7 +34,6 @@ def sort_app(): ...@@ -34,7 +34,6 @@ def sort_app():
df = df.rename(columns={0: "device_id", 1: "app_list",2:"stat_date"}) df = df.rename(columns={0: "device_id", 1: "app_list",2:"stat_date"})
print(df.shape) print(df.shape)
df = df.sort_values(by="stat_date",ascending=False) df = df.sort_values(by="stat_date",ascending=False)
print(df.head())
df = df.drop("stat_date",axis=1) df = df.drop("stat_date",axis=1)
df = df.drop_duplicates("device_id") df = df.drop_duplicates("device_id")
print(df.shape) print(df.shape)
...@@ -95,11 +94,11 @@ def sort_app(): ...@@ -95,11 +94,11 @@ def sort_app():
for i in range(0,df.shape[0],n): for i in range(0,df.shape[0],n):
print(i) print(i)
if i == 0: if i == 0:
temp = df.loc[0:n] temp = df.iloc[0:n]
elif i+n > df.shape[0]: elif i+n > df.shape[0]:
temp = df.loc[i+1:] temp = df.iloc[i:]
else: else:
temp = df.loc[i+1:i+n] temp = df.loc[i:i+n]
pd.io.sql.to_sql(temp, "app_list_sort", yconnect, schema='jerry_test', if_exists='append', index=False) pd.io.sql.to_sql(temp, "app_list_sort", yconnect, schema='jerry_test', if_exists='append', index=False)
print("insert done") print("insert done")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment