Commit 683006e6 authored by 高雅喆's avatar 高雅喆

计算匹配度的时候,用户log限制在该用户下单之前

parent 754b23c0
...@@ -31,7 +31,7 @@ def setup_logger(logger_name, log_file, level=logging.INFO): ...@@ -31,7 +31,7 @@ def setup_logger(logger_name, log_file, level=logging.INFO):
my_log.addHandler(stream_handler) my_log.addHandler(stream_handler)
def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type, all_3tag_2tag, size=10): def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type, pay_time, all_3tag_2tag, size=10):
""" """
:param cl_id: :param cl_id:
:param all_word_tags: :param all_word_tags:
...@@ -44,11 +44,10 @@ def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type, ...@@ -44,11 +44,10 @@ def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type,
db_jerry_test = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db_jerry_test = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC',
db='jerry_test', charset='utf8') db='jerry_test', charset='utf8')
cur_jerry_test = db_jerry_test.cursor() cur_jerry_test = db_jerry_test.cursor()
yesterday = get_yesterday_start_timestamp()
# 用户的非搜索、支付的行为 # 用户的非搜索、支付的行为
user_df_service_sql = "select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log " \ user_df_service_sql = "select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log " \
"where cl_id ='{cl_id}' and time < {yesterday} and action not in " \ "where cl_id ='{cl_id}' and time < {pay_time} and action not in " \
"('api/settlement/alipay_callback','do_search')".format(cl_id=cl_id, yesterday=yesterday) "('api/settlement/alipay_callback','do_search')".format(cl_id=cl_id, pay_time=pay_time)
cur_jerry_test.execute(user_df_service_sql) cur_jerry_test.execute(user_df_service_sql)
data = list(cur_jerry_test.fetchall()) data = list(cur_jerry_test.fetchall())
if data: if data:
...@@ -58,8 +57,8 @@ def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type, ...@@ -58,8 +57,8 @@ def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type,
user_df_service = pd.DataFrame(columns=["time", "cl_id", "score_type", "tag_id", "tag_referrer", "action"]) user_df_service = pd.DataFrame(columns=["time", "cl_id", "score_type", "tag_id", "tag_referrer", "action"])
# 用户的搜索行为 # 用户的搜索行为
user_df_search_sql = "select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log " \ user_df_search_sql = "select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log " \
"where cl_id ='{cl_id}' and time < {yesterday} and " \ "where cl_id ='{cl_id}' and time < {pay_time} and " \
"action = 'do_search'".format(cl_id=cl_id, yesterday=yesterday) "action = 'do_search'".format(cl_id=cl_id, pay_time=pay_time)
cur_jerry_test.execute(user_df_search_sql) cur_jerry_test.execute(user_df_search_sql)
data_search = list(cur_jerry_test.fetchall()) data_search = list(cur_jerry_test.fetchall())
db_jerry_test.close() db_jerry_test.close()
...@@ -120,7 +119,7 @@ def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type, ...@@ -120,7 +119,7 @@ def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type,
return list(), dict() return list(), dict()
def get_user_service_portrait_not_alipay2(cl_id, all_word_tags, all_tag_tag_type, all_3tag_2tag, size=10): def get_user_service_portrait_not_alipay2(cl_id, all_word_tags, all_tag_tag_type, pay_time, all_3tag_2tag, size=10):
""" """
:param cl_id: :param cl_id:
:param all_word_tags: :param all_word_tags:
...@@ -136,8 +135,8 @@ def get_user_service_portrait_not_alipay2(cl_id, all_word_tags, all_tag_tag_type ...@@ -136,8 +135,8 @@ def get_user_service_portrait_not_alipay2(cl_id, all_word_tags, all_tag_tag_type
yesterday = get_yesterday_start_timestamp() yesterday = get_yesterday_start_timestamp()
# 用户的非搜索、支付的行为 # 用户的非搜索、支付的行为
user_df_service_sql = "select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log " \ user_df_service_sql = "select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log " \
"where cl_id ='{cl_id}' and time < {yesterday} and action not in " \ "where cl_id ='{cl_id}' and time < {pay_time} and action not in " \
"('api/settlement/alipay_callback','do_search')".format(cl_id=cl_id, yesterday=yesterday) "('api/settlement/alipay_callback','do_search')".format(cl_id=cl_id, pay_time=pay_time)
cur_jerry_test.execute(user_df_service_sql) cur_jerry_test.execute(user_df_service_sql)
data = list(cur_jerry_test.fetchall()) data = list(cur_jerry_test.fetchall())
if data: if data:
...@@ -147,8 +146,8 @@ def get_user_service_portrait_not_alipay2(cl_id, all_word_tags, all_tag_tag_type ...@@ -147,8 +146,8 @@ def get_user_service_portrait_not_alipay2(cl_id, all_word_tags, all_tag_tag_type
user_df_service = pd.DataFrame(columns=["time", "cl_id", "score_type", "tag_id", "tag_referrer", "action"]) user_df_service = pd.DataFrame(columns=["time", "cl_id", "score_type", "tag_id", "tag_referrer", "action"])
# 用户的搜索行为 # 用户的搜索行为
user_df_search_sql = "select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log " \ user_df_search_sql = "select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log " \
"where cl_id ='{cl_id}' and time < {yesterday} and " \ "where cl_id ='{cl_id}' and time < {pay_time} and " \
"action = 'do_search'".format(cl_id=cl_id, yesterday=yesterday) "action = 'do_search'".format(cl_id=cl_id, pay_time=pay_time)
cur_jerry_test.execute(user_df_search_sql) cur_jerry_test.execute(user_df_search_sql)
data_search = list(cur_jerry_test.fetchall()) data_search = list(cur_jerry_test.fetchall())
db_jerry_test.close() db_jerry_test.close()
...@@ -276,44 +275,49 @@ if __name__ == '__main__': ...@@ -276,44 +275,49 @@ if __name__ == '__main__':
# api_order只有用户的user_id,一个user_id对应多个device_id # api_order只有用户的user_id,一个user_id对应多个device_id
# 用户一次可以下多个订单(美购),一个美购对应多个tag # 用户一次可以下多个订单(美购),一个美购对应多个tag
sql_order_device_info_yesterday = """ sql_order_device_info_yesterday = """
SELECT tmp1.user_id, SELECT tmp1.user_id,
c.device_id, c.device_id,
tmp1.service_ids, tmp1.service_ids,
tmp1.tag_ids tmp1.tag_ids,
FROM tmp1.pay_time
(SELECT tmp.user_id, FROM
tmp.service_ids, (SELECT tmp.user_id,
tmp.tag_ids, tmp.service_ids,
max(tmp.device_id) device_id_id tmp.tag_ids,
FROM tmp.pay_time,
(SELECT a.user_id, max(tmp.device_id) device_id_id
a.service_ids, FROM
a.tag_ids, (SELECT a.user_id,
b.device_id a.service_ids,
FROM a.tag_ids,
(SELECT user_id, a.pay_time,
group_concat(DISTINCT `service_id` separator ',') service_ids, b.device_id
group_concat(DISTINCT `tag_id` separator ',') tag_ids FROM
FROM (SELECT user_id,
(SELECT d.user_id, max(pay_time) AS pay_time,
d.service_id, group_concat(DISTINCT `service_id` separator ',') service_ids,
e.tag_id group_concat(DISTINCT `tag_id` separator ',') tag_ids
FROM api_order d FROM
LEFT JOIN api_servicetag e ON d.service_id = e.service_id (SELECT d.user_id,
LEFT JOIN api_tag f ON e.tag_id = f.id d.service_id,
WHERE d.status=1 unix_timestamp(d.pay_time) AS pay_time,
AND d.pay_time>'{my_yesterday}' e.tag_id
AND d.pay_time<'{today}' FROM api_order d
AND f.tag_type+0 <'4'+0) tmp2 LEFT JOIN api_servicetag e ON d.service_id = e.service_id
GROUP BY user_id) a LEFT JOIN api_tag f ON e.tag_id = f.id
LEFT JOIN statistic_device_user b ON a.user_id = b.user_id) tmp WHERE d.status=1
GROUP BY tmp.user_id) tmp1 AND d.pay_time>'{my_yesterday}'
LEFT JOIN statistic_device c ON tmp1.device_id_id = c.id AND d.pay_time<'{today}'
WHERE c.device_id IS NOT NULL AND f.tag_type+0 <'4'+0) tmp2
GROUP BY user_id) a
LEFT JOIN statistic_device_user b ON a.user_id = b.user_id) tmp
GROUP BY tmp.user_id) tmp1
LEFT JOIN statistic_device c ON tmp1.device_id_id = c.id
WHERE c.device_id IS NOT NULL
""".format(my_yesterday=my_yesterday, today=my_today) """.format(my_yesterday=my_yesterday, today=my_today)
mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'work', 'BJQaT9VzDcuPBqkd', 'zhengxing', mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'work', 'BJQaT9VzDcuPBqkd', 'zhengxing',
sql_order_device_info_yesterday) sql_order_device_info_yesterday)
device_ids_lst = [i["device_id"] for i in mysql_results] device_ids_lst = [(i["device_id"], int(i["pay_time"])) for i in mysql_results]
all_device_order_tags = {i["device_id"]: [int(tag) for tag in i["tag_ids"].split(",")] for i in mysql_results} all_device_order_tags = {i["device_id"]: [int(tag) for tag in i["tag_ids"].split(",")] for i in mysql_results}
# 获取搜索词及其近义词对应的tag # 获取搜索词及其近义词对应的tag
...@@ -336,17 +340,23 @@ if __name__ == '__main__': ...@@ -336,17 +340,23 @@ if __name__ == '__main__':
all_device_portrait_result = dict() all_device_portrait_result = dict()
debug_all_device_portrait_result = dict() debug_all_device_portrait_result = dict()
if version == 1: if version == 1:
for device in device_ids_lst: for order_info in device_ids_lst:
device = order_info[0]
pay_time = order_info[1]
portrait_result, debug_portrait_result = get_user_service_portrait_not_alipay(device, all_word_tags, portrait_result, debug_portrait_result = get_user_service_portrait_not_alipay(device, all_word_tags,
all_tag_tag_type, all_tag_tag_type,
all_3tag_2tag, size=-1) pay_time, all_3tag_2tag,
size=-1)
all_device_portrait_result[device] = portrait_result all_device_portrait_result[device] = portrait_result
debug_all_device_portrait_result[device] = debug_portrait_result debug_all_device_portrait_result[device] = debug_portrait_result
elif version == 0: elif version == 0:
for device in device_ids_lst: for order_info in device_ids_lst:
device = order_info[0]
pay_time = order_info[1]
portrait_result, debug_portrait_result = get_user_service_portrait_not_alipay2(device, all_word_tags, portrait_result, debug_portrait_result = get_user_service_portrait_not_alipay2(device, all_word_tags,
all_tag_tag_type, all_tag_tag_type,
all_3tag_2tag, size=-1) pay_time, all_3tag_2tag,
size=-1)
all_device_portrait_result[device] = portrait_result all_device_portrait_result[device] = portrait_result
debug_all_device_portrait_result[device] = debug_portrait_result debug_all_device_portrait_result[device] = debug_portrait_result
else: else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment