Commit b1c9091b authored by 高雅喆's avatar 高雅喆

增加首页精选的日记点击和美购首页的美购点击

parent 16570e50
......@@ -180,42 +180,7 @@ def get_2_tags_coincide_rate(device_order_tags, device_portrait_result, portrait
return result
if __name__ == '__main__':
try:
parser = argparse.ArgumentParser(description='画像匹配度的统计')
my_yesterday = str(datetime.date.today() - datetime.timedelta(days=1))
parser.add_argument("-o", "--order_date", type=str, dest="order_date", default=my_yesterday, help="统计的下单日期")
parser.add_argument("-log1", "--log1_file", type=str, dest="portrait_stat_log_path",
default="portrait_stat.log", help="画像统计的日志地址")
parser.add_argument("-log2", "--log2_file", type=str, dest="debug_portrait_stat_log_path",
default="debug_portrait_stat.log", help="画像统计的日志地址")
parser.add_argument("-t", "--top", type=int, dest="portrait_top_n", default=3, help="选取画像的前n个tag去统计匹配度")
parser.add_argument("-c", "--coincide", type=int, dest="coincide_n", default=1, help="选取n个tag重合个数作为判断是否匹配的阈值")
parser.add_argument("-v", "--version", type=int, dest="version", default=1, help="选取翔宇(0),英赫(1)版本进行统计")
parser.add_argument("-e", "--exponential", type=int, dest="exponential", default=0, help="是否采用指数衰减")
parser.add_argument("-n", "--normalization_size", type=int, dest="normalization_size", default=7,
help="天数差归一化的区间")
parser.add_argument("-d", "--decay_days", type=int, dest="decay_days", default=180, help="分数衰减的天数")
args = parser.parse_args()
order_date = args.order_date
order_date_tomorrow = str(datetime.datetime.strptime(order_date, '%Y-%m-%d') + datetime.timedelta(days=1))
portrait_stat_log_path = args.portrait_stat_log_path
debug_portrait_stat_log_path = args.debug_portrait_stat_log_path
cmd_portrait_top_n = args.portrait_top_n
cmd_coincide_n = args.coincide_n
version = args.version
exponential = args.exponential
normalization_size = args.normalization_size
decay_days = args.decay_days
LOG_DIR = "/home/gmuser/gyz/log/"
my_today = str(datetime.date.today())
setup_logger("log1", LOG_DIR + portrait_stat_log_path)
setup_logger("log2", LOG_DIR + debug_portrait_stat_log_path)
log1 = logging.getLogger('log1')
log2 = logging.getLogger('log2')
def get_user_order_info_yesterday():
# 获取昨天下单的用户设备id,下单的美购,美购对应的tag
# api_order只有用户的user_id,一个user_id对应多个device_id
# 用户一次可以下多个订单(美购),一个美购对应多个tag
......@@ -262,8 +227,56 @@ if __name__ == '__main__':
""".format(order_date=order_date, order_date_tomorrow=order_date_tomorrow)
mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'work', 'BJQaT9VzDcuPBqkd', 'zhengxing',
sql_order_device_info_yesterday)
device_ids_lst = [(i["device_id"], int(i["pay_time"])) for i in mysql_results]
device_ids_info = [(i["device_id"], int(i["pay_time"])) for i in mysql_results]
all_device_order_tags = {i["device_id"]: [int(tag) for tag in i["tag_ids"].split(",")] for i in mysql_results}
return device_ids_info, all_device_order_tags
def get_user_diary_click_info_yesterday():
pass
def get_user_service_click_info_yesterday():
pass
if __name__ == '__main__':
try:
parser = argparse.ArgumentParser(description='画像匹配度的统计')
my_yesterday = str(datetime.date.today() - datetime.timedelta(days=1))
parser.add_argument("-o", "--order_date", type=str, dest="order_date", default=my_yesterday, help="统计的下单日期")
parser.add_argument("-log1", "--log1_file", type=str, dest="portrait_stat_log_path",
default="portrait_stat.log", help="画像统计的日志地址")
parser.add_argument("-log2", "--log2_file", type=str, dest="debug_portrait_stat_log_path",
default="debug_portrait_stat.log", help="画像统计的日志地址")
parser.add_argument("-t", "--top", type=int, dest="portrait_top_n", default=3, help="选取画像的前n个tag去统计匹配度")
parser.add_argument("-c", "--coincide", type=int, dest="coincide_n", default=1, help="选取n个tag重合个数作为判断是否匹配的阈值")
parser.add_argument("-v", "--version", type=int, dest="version", default=1, help="选取翔宇(0),英赫(1)版本进行统计")
parser.add_argument("-e", "--exponential", type=int, dest="exponential", default=0, help="是否采用指数衰减")
parser.add_argument("-n", "--normalization_size", type=int, dest="normalization_size", default=7,
help="天数差归一化的区间")
parser.add_argument("-d", "--decay_days", type=int, dest="decay_days", default=180, help="分数衰减的天数")
parser.add_argument("-a", "--action_type", type=list, dest="action_type", default=["order"], help="计算匹配度的行为")
args = parser.parse_args()
order_date = args.order_date
order_date_tomorrow = str(datetime.datetime.strptime(order_date, '%Y-%m-%d') + datetime.timedelta(days=1))
portrait_stat_log_path = args.portrait_stat_log_path
debug_portrait_stat_log_path = args.debug_portrait_stat_log_path
cmd_portrait_top_n = args.portrait_top_n
cmd_coincide_n = args.coincide_n
version = args.version
exponential = args.exponential
normalization_size = args.normalization_size
decay_days = args.decay_days
action_type = args.action_type
LOG_DIR = "/home/gmuser/gyz/log/"
my_today = str(datetime.date.today())
setup_logger("log1", LOG_DIR + portrait_stat_log_path)
setup_logger("log2", LOG_DIR + debug_portrait_stat_log_path)
log1 = logging.getLogger('log1')
log2 = logging.getLogger('log2')
# 获取搜索词及其近义词对应的tag
all_word_tags = get_all_word_tags()
......@@ -272,7 +285,20 @@ if __name__ == '__main__':
# 3级tag对应的2级tag
all_3tag_2tag = get_all_3tag_2tag()
# 昨天下单了的用户的美购tags(转成2级tags)
for action in action_type:
# 获取昨天产生行为的设备id、以及行为对应的tag
device_ids_lst = list()
all_device_order_tags = dict()
if "order" in action_type:
device_ids_lst, all_device_order_tags = get_user_order_info_yesterday()
elif "diary" in action_type:
device_ids_lst, all_device_order_tags = get_user_diary_click_info_yesterday()
elif "service" in action_type:
device_ids_lst, all_device_order_tags = get_user_service_click_info_yesterday()
else:
break
# tags扩展2级tags
all_device_order_tags2 = dict()
for device in all_device_order_tags:
tags = all_device_order_tags[device]
......@@ -281,7 +307,7 @@ if __name__ == '__main__':
tags += tags2
all_device_order_tags2[device] = tags
# 昨天下单了的用户的去除支付行为的画像
# 用户的去除支付行为的画像
all_device_portrait_result = dict()
debug_all_device_portrait_result = dict()
for order_info in device_ids_lst:
......@@ -308,6 +334,16 @@ if __name__ == '__main__':
log2.info({"统计日期": my_today})
log2.info({"参数信息": args})
log2.info({"版本": "英赫版" if version == 1 else "翔宇版"})
action_type_detail = ""
if action_type == "order":
action_type_detail = "昨天下单了的用户"
elif action_type == "diary":
action_type_detail = "昨天在首页精选点击了日记的用户"
elif action_type == "service":
action_type_detail = "昨天在美购首页点击了美购的用户"
else:
pass
log2.info({"统计用户": action_type_detail})
for device in no_coincide_devices:
no_coincide_devices_debug = dict()
device_portrait_n = all_device_portrait_result[device][:args.portrait_top_n]
......@@ -337,6 +373,7 @@ if __name__ == '__main__':
log1.info({"画像信息统计日期": my_today})
log1.info({"参数信息": args})
log1.info({"版本": "英赫版" if version == 1 else "翔宇版"})
log1.info({"统计用户": action_type_detail})
log1.info({"画像更新耗时(分钟)": time_consuming})
log1.info({"画像更新的设备数": portrait_device_count[0]["count(*)"]})
log1.info("")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment