Commit 2239d1e1 authored by 张彦钊's avatar 张彦钊

add test

parent e9271be9
......@@ -68,25 +68,26 @@ def sort_app():
"job": {"智联招聘", "前程无忧", "斗米", "拉勾", "Boss直聘", "猎聘同道", "智联招聘"}
}
df["app_list"] = df["app_list"].apply(json_format)
n = df.shape[0]
for i in category.keys():
df[i] = df["app_list"].apply(lambda x: 1 if len(x & category[i]) > 0 else 0)
print(i)
print(df[i].value_counts())
print(df.loc[df[i]==1].shape[0]/n)
df = df.drop("app_list",axis=1)
yconnect = create_engine('mysql+pymysql://root:3SYz54LS9#^9sBvC@10.66.157.22:4000/jerry_test?charset=utf8')
print(df.shape)
n = 200000
for i in range(0,df.shape[0],n):
print(i)
if i == 0:
temp = df.loc[0:n]
elif i+n > df.shape[0]:
temp = df.loc[i+1:]
else:
temp = df.loc[i+1:i+n]
pd.io.sql.to_sql(temp, "app_list_sort", yconnect, schema='jerry_test', if_exists='append', index=False)
print("insert done")
# yconnect = create_engine('mysql+pymysql://root:3SYz54LS9#^9sBvC@10.66.157.22:4000/jerry_test?charset=utf8')
# print(df.shape)
# n = 200000
# for i in range(0,df.shape[0],n):
# print(i)
# if i == 0:
# temp = df.loc[0:n]
# elif i+n > df.shape[0]:
# temp = df.loc[i+1:]
# else:
# temp = df.loc[i+1:i+n]
# pd.io.sql.to_sql(temp, "app_list_sort", yconnect, schema='jerry_test', if_exists='append', index=False)
# print("insert done")
......
......@@ -48,7 +48,7 @@ def click():
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
sql = "select d.cid_id,f.level1_ids,f.level2_ids from data_feed_click d left join diary_feat f " \
"on d.cid_id = f.diary_id where d.device_id = '358035085192742' " \
"and d.cid_type = 'diary' and d.stat_date > '2018-12-20'"
"and (d.cid_type = 'diary' or d.cid_type = 'diary_video') and d.stat_date > '2018-12-20'"
df = con_sql(db, sql)
n = df.shape[0]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment