Commit 1e4ca86c authored by 高雅喆's avatar 高雅喆

增加首页精选的日记点击和美购首页的美购点击

parent d52cd012
......@@ -180,11 +180,11 @@ def get_2_tags_coincide_rate(device_order_tags, device_portrait_result, portrait
return result
def get_user_order_info_yesterday():
def get_user_order_info_yesterday(order_date, order_date_tomorrow):
# 获取昨天下单的用户设备id,下单的美购,美购对应的tag
# api_order只有用户的user_id,一个user_id对应多个device_id
# 用户一次可以下多个订单(美购),一个美购对应多个tag
sql_order_device_info_yesterday = """
sql_device_info_yesterday = """
SELECT tmp1.user_id,
c.device_id,
tmp1.service_ids,
......@@ -226,25 +226,75 @@ def get_user_order_info_yesterday():
WHERE c.device_id IS NOT NULL
""".format(order_date=order_date, order_date_tomorrow=order_date_tomorrow)
mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'work', 'BJQaT9VzDcuPBqkd', 'zhengxing',
sql_order_device_info_yesterday)
sql_device_info_yesterday)
device_ids_info = [(i["device_id"], int(i["pay_time"])) for i in mysql_results]
all_device_order_tags = {i["device_id"]: [int(tag) for tag in i["tag_ids"].split(",")] for i in mysql_results}
return device_ids_info, all_device_order_tags
def get_user_diary_click_info_yesterday():
pass
def get_user_service_click_info_yesterday():
pass
all_device_action_tags = {i["device_id"]: [int(tag) for tag in i["tag_ids"].split(",")] for i in mysql_results}
return device_ids_info, all_device_action_tags
def get_user_diary_click_info_yesterday(click_date, click_date_tomorrow):
# 获取昨天在首页精选点击日记的用户设备id,点击的日记,日记对应的tag
# 一个用户对应多个日记,一个日记对应多个tag
sql_device_info_yesterday = """
SELECT cl_id device_id,
max(click_time) AS click_time,
group_concat(DISTINCT `diary_id` separator ',') diary_ids,
group_concat(DISTINCT `tag_id` separator ',') tag_ids
FROM
(SELECT d.cl_id,
d.diary_id,
unix_timestamp(d.click_time) AS click_time,
e.tag_id
FROM jerry_test.user_click_diary_log d
LEFT JOIN eagle.src_mimas_prod_api_diary_tags e ON d.diary_id = e.diary_id
LEFT JOIN eagle.src_zhengxing_api_tag f ON e.tag_id = f.id
WHERE d.action_from='home精选'
AND d.action='on_click_card'
AND d.click_time>'{click_date}'
AND d.click_time<'{click_date_tomorrow}'
AND f.tag_type+0 <'4'+0) tmp2
GROUP BY cl_id
""".format(click_date=click_date, click_date_tomorrow=click_date_tomorrow)
mysql_results = get_data_by_mysql('172.16.40.158', 4000, 'root', '3SYz54LS9#^9sBvC', 'jerry_test', sql_device_info_yesterday)
device_ids_info = [(i["device_id"], int(i["click_time"])) for i in mysql_results]
all_device_action_tags = {i["device_id"]: [int(tag) for tag in i["tag_ids"].split(",")] for i in mysql_results}
return device_ids_info, all_device_action_tags
def get_user_service_click_info_yesterday(click_date, click_date_tomorrow):
# 获取昨天在美购首页有过点击的用户设备id,点击的美购,美购对应的tag
# 一个用户对应多个美购,一个美购对应多个tag
sql_device_info_yesterday = """
SELECT cl_id device_id,
max(click_time) AS click_time,
group_concat(DISTINCT `service_id` separator ',') service_ids,
group_concat(DISTINCT `tag_id` separator ',') tag_ids
FROM
(SELECT d.cl_id,
d.service_id,
unix_timestamp(d.click_time) AS click_time,
e.tag_id
FROM jerry_test.user_click_service_log d
LEFT JOIN eagle.src_zhengxing_api_servicetag e ON d.service_id = e.service_id
LEFT JOIN eagle.src_zhengxing_api_tag f ON e.tag_id = f.id
WHERE d.action_from='welfare_home_list_item'
AND d.action='goto_welfare_detail'
AND d.click_time>'{click_date}'
AND d.click_time<'{click_date_tomorrow}'
AND f.tag_type+0 <'4'+0) tmp2
GROUP BY cl_id
""".format(click_date=click_date, click_date_tomorrow=click_date_tomorrow)
mysql_results = get_data_by_mysql('172.16.40.158', 4000, 'root', '3SYz54LS9#^9sBvC', 'jerry_test', sql_device_info_yesterday)
device_ids_info = [(i["device_id"], int(i["click_time"])) for i in mysql_results]
all_device_action_tags = {i["device_id"]: [int(tag) for tag in i["tag_ids"].split(",")] for i in mysql_results}
return device_ids_info, all_device_action_tags
if __name__ == '__main__':
try:
parser = argparse.ArgumentParser(description='画像匹配度的统计')
my_yesterday = str(datetime.date.today() - datetime.timedelta(days=1))
parser.add_argument("-o", "--order_date", type=str, dest="order_date", default=my_yesterday, help="统计的下单日期")
parser.add_argument("-o", "--order_date", type=str, dest="order_date", default=my_yesterday, help="统计的行为日期")
parser.add_argument("-log1", "--log1_file", type=str, dest="portrait_stat_log_path",
default="portrait_stat.log", help="画像统计的日志地址")
parser.add_argument("-log2", "--log2_file", type=str, dest="debug_portrait_stat_log_path",
......@@ -288,24 +338,26 @@ if __name__ == '__main__':
for action in action_type:
# 获取昨天产生行为的设备id、以及行为对应的tag
device_ids_lst = list()
all_device_order_tags = dict()
all_device_action_tags = dict()
if "order" in action_type:
device_ids_lst, all_device_order_tags = get_user_order_info_yesterday()
device_ids_lst, all_device_action_tags = get_user_order_info_yesterday(order_date, order_date_tomorrow)
elif "diary" in action_type:
device_ids_lst, all_device_order_tags = get_user_diary_click_info_yesterday()
device_ids_lst, all_device_action_tags = get_user_diary_click_info_yesterday(order_date,
order_date_tomorrow)
elif "service" in action_type:
device_ids_lst, all_device_order_tags = get_user_service_click_info_yesterday()
device_ids_lst, all_device_action_tags = get_user_service_click_info_yesterday(order_date,
order_date_tomorrow)
else:
break
# tags扩展2级tags
all_device_order_tags2 = dict()
for device in all_device_order_tags:
tags = all_device_order_tags[device]
all_device_action_tags2 = dict()
for device in all_device_action_tags:
tags = all_device_action_tags[device]
for tag in tags:
tags2 = all_3tag_2tag.get(tag, [])
tags += tags2
all_device_order_tags2[device] = tags
all_device_action_tags2[device] = tags
# 用户的去除支付行为的画像
all_device_portrait_result = dict()
......@@ -325,7 +377,7 @@ if __name__ == '__main__':
debug_all_device_portrait_result[device] = debug_portrait_result
# 比较两个tag列表的重合率
result = get_2_tags_coincide_rate(all_device_order_tags2, all_device_portrait_result, cmd_portrait_top_n,
result = get_2_tags_coincide_rate(all_device_action_tags2, all_device_portrait_result, cmd_portrait_top_n,
cmd_coincide_n)
# 有画像没匹配上的用户的画像信息
......@@ -347,7 +399,7 @@ if __name__ == '__main__':
for device in no_coincide_devices:
no_coincide_devices_debug = dict()
device_portrait_n = all_device_portrait_result[device][:args.portrait_top_n]
device_order_tags = all_device_order_tags2[device]
device_order_tags = all_device_action_tags2[device]
debug_device_portrait_result = debug_all_device_portrait_result[device]
no_coincide_devices_debug[device] = {
"画像的前{top_n}个tag".format(top_n=args.portrait_top_n): [debug_device_portrait_result[tag] for tag in
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment