Commit 930bd6b5 authored by 张彦钊's avatar 张彦钊

add exp

parent b5793e38
......@@ -47,13 +47,25 @@ def clean():
for i in manufacturer:
if df.loc[df["manufacturer"]==i].shape[0]/n < 0.0005:
df.loc[df["manufacturer"] == i,["manufacturer"]] = "other"
print(df["manufacturer"].unique())
channel = df["channel"].unique()
for i in channel:
if df.loc[df["channel"] == i].shape[0] / n < 0.0001:
df.loc[df["channel"] == i, ["channel"]] = "other"
from sqlalchemy import create_engine
yconnect = create_engine('mysql+pymysql://root:3SYz54LS9#^9sBvC@10.66.157.22:4000/jerry_test?charset=utf8')
n = 200000
for i in range(0,df.shape[0],n):
print(i)
if i == 0:
temp = df.loc[0:n]
elif i+n > df.shape[0]:
temp = df.loc[i+1:]
else:
temp = df.loc[i+1:i+n]
pd.io.sql.to_sql(temp, "user_feature_clean", yconnect, schema='jerry_test', if_exists='append', index=False)
print("insert done")
print(df["channel"].unique())
if __name__ == "__main__":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment