Commit 683006e6 authored by 高雅喆's avatar 高雅喆

计算匹配度的时候,用户log限制在该用户下单之前

parent 754b23c0
......@@ -31,7 +31,7 @@ def setup_logger(logger_name, log_file, level=logging.INFO):
my_log.addHandler(stream_handler)
def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type, all_3tag_2tag, size=10):
def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type, pay_time, all_3tag_2tag, size=10):
"""
:param cl_id:
:param all_word_tags:
......@@ -44,11 +44,10 @@ def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type,
db_jerry_test = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC',
db='jerry_test', charset='utf8')
cur_jerry_test = db_jerry_test.cursor()
yesterday = get_yesterday_start_timestamp()
# 用户的非搜索、支付的行为
user_df_service_sql = "select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log " \
"where cl_id ='{cl_id}' and time < {yesterday} and action not in " \
"('api/settlement/alipay_callback','do_search')".format(cl_id=cl_id, yesterday=yesterday)
"where cl_id ='{cl_id}' and time < {pay_time} and action not in " \
"('api/settlement/alipay_callback','do_search')".format(cl_id=cl_id, pay_time=pay_time)
cur_jerry_test.execute(user_df_service_sql)
data = list(cur_jerry_test.fetchall())
if data:
......@@ -58,8 +57,8 @@ def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type,
user_df_service = pd.DataFrame(columns=["time", "cl_id", "score_type", "tag_id", "tag_referrer", "action"])
# 用户的搜索行为
user_df_search_sql = "select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log " \
"where cl_id ='{cl_id}' and time < {yesterday} and " \
"action = 'do_search'".format(cl_id=cl_id, yesterday=yesterday)
"where cl_id ='{cl_id}' and time < {pay_time} and " \
"action = 'do_search'".format(cl_id=cl_id, pay_time=pay_time)
cur_jerry_test.execute(user_df_search_sql)
data_search = list(cur_jerry_test.fetchall())
db_jerry_test.close()
......@@ -120,7 +119,7 @@ def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type,
return list(), dict()
def get_user_service_portrait_not_alipay2(cl_id, all_word_tags, all_tag_tag_type, all_3tag_2tag, size=10):
def get_user_service_portrait_not_alipay2(cl_id, all_word_tags, all_tag_tag_type, pay_time, all_3tag_2tag, size=10):
"""
:param cl_id:
:param all_word_tags:
......@@ -136,8 +135,8 @@ def get_user_service_portrait_not_alipay2(cl_id, all_word_tags, all_tag_tag_type
yesterday = get_yesterday_start_timestamp()
# 用户的非搜索、支付的行为
user_df_service_sql = "select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log " \
"where cl_id ='{cl_id}' and time < {yesterday} and action not in " \
"('api/settlement/alipay_callback','do_search')".format(cl_id=cl_id, yesterday=yesterday)
"where cl_id ='{cl_id}' and time < {pay_time} and action not in " \
"('api/settlement/alipay_callback','do_search')".format(cl_id=cl_id, pay_time=pay_time)
cur_jerry_test.execute(user_df_service_sql)
data = list(cur_jerry_test.fetchall())
if data:
......@@ -147,8 +146,8 @@ def get_user_service_portrait_not_alipay2(cl_id, all_word_tags, all_tag_tag_type
user_df_service = pd.DataFrame(columns=["time", "cl_id", "score_type", "tag_id", "tag_referrer", "action"])
# 用户的搜索行为
user_df_search_sql = "select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log " \
"where cl_id ='{cl_id}' and time < {yesterday} and " \
"action = 'do_search'".format(cl_id=cl_id, yesterday=yesterday)
"where cl_id ='{cl_id}' and time < {pay_time} and " \
"action = 'do_search'".format(cl_id=cl_id, pay_time=pay_time)
cur_jerry_test.execute(user_df_search_sql)
data_search = list(cur_jerry_test.fetchall())
db_jerry_test.close()
......@@ -279,24 +278,29 @@ if __name__ == '__main__':
SELECT tmp1.user_id,
c.device_id,
tmp1.service_ids,
tmp1.tag_ids
tmp1.tag_ids,
tmp1.pay_time
FROM
(SELECT tmp.user_id,
tmp.service_ids,
tmp.tag_ids,
tmp.pay_time,
max(tmp.device_id) device_id_id
FROM
(SELECT a.user_id,
a.service_ids,
a.tag_ids,
a.pay_time,
b.device_id
FROM
(SELECT user_id,
max(pay_time) AS pay_time,
group_concat(DISTINCT `service_id` separator ',') service_ids,
group_concat(DISTINCT `tag_id` separator ',') tag_ids
FROM
(SELECT d.user_id,
d.service_id,
unix_timestamp(d.pay_time) AS pay_time,
e.tag_id
FROM api_order d
LEFT JOIN api_servicetag e ON d.service_id = e.service_id
......@@ -313,7 +317,7 @@ if __name__ == '__main__':
""".format(my_yesterday=my_yesterday, today=my_today)
mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'work', 'BJQaT9VzDcuPBqkd', 'zhengxing',
sql_order_device_info_yesterday)
device_ids_lst = [i["device_id"] for i in mysql_results]
device_ids_lst = [(i["device_id"], int(i["pay_time"])) for i in mysql_results]
all_device_order_tags = {i["device_id"]: [int(tag) for tag in i["tag_ids"].split(",")] for i in mysql_results}
# 获取搜索词及其近义词对应的tag
......@@ -336,17 +340,23 @@ if __name__ == '__main__':
all_device_portrait_result = dict()
debug_all_device_portrait_result = dict()
if version == 1:
for device in device_ids_lst:
for order_info in device_ids_lst:
device = order_info[0]
pay_time = order_info[1]
portrait_result, debug_portrait_result = get_user_service_portrait_not_alipay(device, all_word_tags,
all_tag_tag_type,
all_3tag_2tag, size=-1)
pay_time, all_3tag_2tag,
size=-1)
all_device_portrait_result[device] = portrait_result
debug_all_device_portrait_result[device] = debug_portrait_result
elif version == 0:
for device in device_ids_lst:
for order_info in device_ids_lst:
device = order_info[0]
pay_time = order_info[1]
portrait_result, debug_portrait_result = get_user_service_portrait_not_alipay2(device, all_word_tags,
all_tag_tag_type,
all_3tag_2tag, size=-1)
pay_time, all_3tag_2tag,
size=-1)
all_device_portrait_result[device] = portrait_result
debug_all_device_portrait_result[device] = debug_portrait_result
else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment