Commit 085146e6 authored by 张彦钊's avatar 张彦钊

按照用户去重应用列表样本

parent 9422099a
......@@ -32,8 +32,12 @@ def sort_app():
sql = "select device_id,app_list from device_id_applist"
df = con_sql(db, sql).dropna()
df = df.rename(columns={0: "device_id", 1: "app_list"})
print(df.shape)
df = df.drop_duplicates("device_id")
print(df.shape)
df = df.loc[df["app_list"].apply(is_json)]
category = {"competitor":{"新氧美容","悦美","美呗整形","悦美微整形","如丽美容","医美咖","整形去哪儿","美黛拉","整形思密达","美芽"},
"dianshang":{"京东","淘宝","唯品会","天猫","苏宁易购","国美","当当","亚马逊","网易严选","小米有品"},
"kuajing_dianshang": {"小红书", "网易考拉", "洋码头", "达令全球好货", "海狐海淘",
......
......@@ -146,20 +146,17 @@ def get_data():
start = (temp - datetime.timedelta(days=30)).strftime("%Y-%m-%d")
print(start)
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
# sql = "select e.y,e.z,e.stat_date,e.ucity_id,e.clevel1_id,e.ccity_name," \
# "u.device_type,u.manufacturer,u.channel,c.top,cid_time.time,e.device_id," \
# "a.competitor,a.dianshang,a.kuajing_dianshang,a.zhibo,a.youxizhibo,a.short_video,a.meitu,a.tiyu," \
# "a.read,a.finance,a.fashion_clothes,a.muying,a.fresh,a.bijia,a.travel,a.airplane," \
# "a.love,a.stock,a.car,a.child,a.homework,a.work,a.job " \
# "from esmm_train_data e left join user_feature_clean u on e.device_id = u.device_id " \
# "left join cid_type_top c on e.device_id = c.device_id left join cid_time on e.cid_id = cid_time.cid_id " \
# "left join app_list_sort a on e.device_id = a.device_id " \
# "where e.stat_date >= '{}'".format(start)
sql = "select e.y,e.z,e.stat_date,e.ucity_id,e.clevel1_id,e.ccity_name," \
"u.device_type,u.manufacturer,u.channel,c.top,cid_time.time,e.device_id " \
"u.device_type,u.manufacturer,u.channel,c.top,cid_time.time,e.device_id," \
"a.competitor,a.dianshang,a.kuajing_dianshang,a.zhibo,a.youxizhibo,a.short_video,a.meitu,a.tiyu," \
"a.read,a.finance,a.fashion_clothes,a.muying,a.fresh,a.bijia,a.travel,a.airplane," \
"a.love,a.stock,a.car,a.child,a.homework,a.work,a.job " \
"from esmm_train_data e left join user_feature_clean u on e.device_id = u.device_id " \
"left join cid_type_top c on e.device_id = c.device_id left join cid_time on e.cid_id = cid_time.cid_id " \
"left join cid_type_top c on e.device_id = c.device_id " \
"left join cid_time on e.cid_id = cid_time.cid_id " \
"left join app_list_sort a on e.device_id = a.device_id " \
"where e.stat_date >= '{}'".format(start)
df = con_sql(db, sql)
print(df.shape)
df = df.rename(columns={0: "y", 1: "z", 2: "stat_date", 3: "ucity_id",4: "clevel1_id", 5: "ccity_name",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment