Commit 254a3716 authored by 赵威's avatar 赵威

Merge branch 'offic' into 'master'

Offic

See merge request !81
parents 120a8db5 500d969b
...@@ -197,6 +197,9 @@ def update_tag3_user_portrait(cl_id): ...@@ -197,6 +197,9 @@ def update_tag3_user_portrait(cl_id):
user_df["second_positions"] = list(zip(user_df["second_positions"].apply(lambda x: x.split(",")), user_df["tag_score"])) user_df["second_positions"] = list(zip(user_df["second_positions"].apply(lambda x: x.split(",")), user_df["tag_score"]))
user_df["projects"] = list(zip(user_df["projects"].apply(lambda x: x.split(",")), user_df["tag_score"])) user_df["projects"] = list(zip(user_df["projects"].apply(lambda x: x.split(",")), user_df["tag_score"]))
user_df["business_tags"] = list(zip(user_df["business_tags"].apply(lambda x: x.split(",")), user_df["tag_score"])) user_df["business_tags"] = list(zip(user_df["business_tags"].apply(lambda x: x.split(",")), user_df["tag_score"]))
user_df["selected_stars"] = list(zip(user_df["selected_stars"].apply(lambda x: x.split(",")), user_df["tag_score"]))
user_df["selected_internet_celebrity"] = list(
zip(user_df["selected_internet_celebrity"].apply(lambda x: x.split(",")), user_df["tag_score"]))
user_df["first_solutions_dict"] = user_df["first_solutions"].apply(lambda x: make_dict_from_pair(x)) user_df["first_solutions_dict"] = user_df["first_solutions"].apply(lambda x: make_dict_from_pair(x))
user_df["second_solutions_dict"] = user_df["second_solutions"].apply(lambda x: make_dict_from_pair(x)) user_df["second_solutions_dict"] = user_df["second_solutions"].apply(lambda x: make_dict_from_pair(x))
...@@ -206,6 +209,9 @@ def update_tag3_user_portrait(cl_id): ...@@ -206,6 +209,9 @@ def update_tag3_user_portrait(cl_id):
user_df["second_positions_dict"] = user_df["second_positions"].apply(lambda x: make_dict_from_pair(x)) user_df["second_positions_dict"] = user_df["second_positions"].apply(lambda x: make_dict_from_pair(x))
user_df["projects_dict"] = user_df["projects"].apply(lambda x: make_dict_from_pair(x)) user_df["projects_dict"] = user_df["projects"].apply(lambda x: make_dict_from_pair(x))
user_df["business_tags_dict"] = user_df["business_tags"].apply(lambda x: make_dict_from_pair(x)) user_df["business_tags_dict"] = user_df["business_tags"].apply(lambda x: make_dict_from_pair(x))
user_df["selected_stars_dict"] = user_df["selected_stars"].apply(lambda x: make_dict_from_pair(x))
user_df["selected_internet_celebrity_dict"] = user_df["selected_internet_celebrity"].apply(
lambda x: make_dict_from_pair(x))
first_solutions_list = user_df["first_solutions_dict"].tolist() first_solutions_list = user_df["first_solutions_dict"].tolist()
second_solutions_list = user_df["second_solutions_dict"].tolist() second_solutions_list = user_df["second_solutions_dict"].tolist()
...@@ -215,6 +221,8 @@ def update_tag3_user_portrait(cl_id): ...@@ -215,6 +221,8 @@ def update_tag3_user_portrait(cl_id):
second_positions_list = user_df["second_positions_dict"].tolist() second_positions_list = user_df["second_positions_dict"].tolist()
projects_list = user_df["projects_dict"].tolist() projects_list = user_df["projects_dict"].tolist()
business_tags_list = user_df["business_tags_dict"].tolist() business_tags_list = user_df["business_tags_dict"].tolist()
selected_stars_list = user_df["selected_stars_dict"].tolist()
selected_internet_celebrity_list = user_df["selected_internet_celebrity_dict"].tolist()
first_demands_score = merge_values(first_demands_list) first_demands_score = merge_values(first_demands_list)
second_demands_score = merge_values(second_demands_list) second_demands_score = merge_values(second_demands_list)
...@@ -225,6 +233,8 @@ def update_tag3_user_portrait(cl_id): ...@@ -225,6 +233,8 @@ def update_tag3_user_portrait(cl_id):
# projects_score = merge_values(projects_list) # projects_score = merge_values(projects_list)
projects_score_tmp = merge_values(projects_list) projects_score_tmp = merge_values(projects_list)
business_tags_score = merge_values(business_tags_list) business_tags_score = merge_values(business_tags_list)
selected_stars_score = merge_values(selected_stars_list)
selected_internet_celebrity_score = merge_values(selected_internet_celebrity_list)
tmp = [ tmp = [
"不感兴趣", "没有想法", "八卦来了", "颜值速报", "医美审美干货", "其他项目", "网红颜值", "少年之名", "郑人予", "热点课代表", "私密", "陈瑞泽", "符仁杰", "祖力亚尔", "刘泽旭", "不感兴趣", "没有想法", "八卦来了", "颜值速报", "医美审美干货", "其他项目", "网红颜值", "少年之名", "郑人予", "热点课代表", "私密", "陈瑞泽", "符仁杰", "祖力亚尔", "刘泽旭",
...@@ -275,6 +285,8 @@ def update_tag3_user_portrait(cl_id): ...@@ -275,6 +285,8 @@ def update_tag3_user_portrait(cl_id):
"second_positions": second_positions_score, "second_positions": second_positions_score,
"projects": projects_score, "projects": projects_score,
"business_tags": business_tags_score, "business_tags": business_tags_score,
"selected_stars": selected_stars_score,
"selected_internet_celebrity": selected_internet_celebrity_score,
"paid_business_tags": paid_business_tags_score, "paid_business_tags": paid_business_tags_score,
"validate_business_tags": validate_business_tags_score, "validate_business_tags": validate_business_tags_score,
"update_time": str(datetime.datetime.now()) "update_time": str(datetime.datetime.now())
...@@ -295,10 +307,13 @@ def update_tag3_user_portrait(cl_id): ...@@ -295,10 +307,13 @@ def update_tag3_user_portrait(cl_id):
res["device_type"] = device_type res["device_type"] = device_type
res["channel"] = channel res["channel"] = channel
print(res)
if (len(first_demands_score.keys()) > 0) or (len(second_demands_score.keys()) > 0) or \ if (len(first_demands_score.keys()) > 0) or (len(second_demands_score.keys()) > 0) or \
(len(first_solutions_score.keys()) > 0) or (len(second_solutions_score.keys()) > 0) or \ (len(first_solutions_score.keys()) > 0) or (len(second_solutions_score.keys()) > 0) or \
(len(first_positions_score.keys()) > 0) or (len(second_positions_score.keys()) > 0) or \ (len(first_positions_score.keys()) > 0) or (len(second_positions_score.keys()) > 0) or \
(len(projects_score.keys()) > 0) or (len(business_tags_score.keys()) > 0): (len(projects_score.keys()) > 0) or (len(business_tags_score.keys()) > 0) or \
(len(selected_stars_score.keys() > 0)) or (len(selected_internet_celebrity_score.keys() > 0)):
redis_client.set(key, json.dumps(res)) redis_client.set(key, json.dumps(res))
redis_client.expire(key, 60 * 60 * 24 * 180) redis_client.expire(key, 60 * 60 * 24 * 180)
...@@ -312,6 +327,8 @@ def update_tag3_user_portrait(cl_id): ...@@ -312,6 +327,8 @@ def update_tag3_user_portrait(cl_id):
",".join(second_positions_score.keys()), ",".join(second_positions_score.keys()),
",".join(projects_score.keys()), ",".join(projects_score.keys()),
",".join(business_tags_score.keys()), ",".join(business_tags_score.keys()),
",".join(selected_stars_score.keys()),
",".join(selected_internet_celebrity_score.keys()),
) )
# body = {} # body = {}
...@@ -383,8 +400,8 @@ def consume_kafka(): ...@@ -383,8 +400,8 @@ def consume_kafka():
if __name__ == "__main__": if __name__ == "__main__":
start = datetime.datetime.now() start = datetime.datetime.now()
# cl_id = "867617044159377" # cl_id = ""
# print(update_tag3_user_portrait(cl_id)) # update_tag3_user_portrait(cl_id)
consume_kafka() consume_kafka()
end = datetime.datetime.now() end = datetime.datetime.now()
......
...@@ -443,11 +443,13 @@ def compute_tag3_score(x): ...@@ -443,11 +443,13 @@ def compute_tag3_score(x):
def get_tag3_user_log(cl_id): def get_tag3_user_log(cl_id):
columns = [ columns = [
"log_time", "score_type", "event_cn", "first_solutions", "second_solutions", "first_demands", "second_demands", "log_time", "score_type", "event_cn", "first_solutions", "second_solutions", "first_demands", "second_demands",
"first_positions", "second_positions", "projects", "business_tags" "first_positions", "second_positions", "projects", "business_tags",
"selected_stars", "selected_internet_celebrity"
] ]
try: try:
sql = """select log_time, score_type, event_cn, first_solutions, second_solutions, first_demands, sql = """select log_time, score_type, event_cn, first_solutions, second_solutions, first_demands,
second_demands, first_positions, second_positions, projects, business_tags second_demands, first_positions, second_positions, projects,
business_tags, selected_stars, selected_internet_celebrity
from kafka_tag3_log where cl_id = '{}'""".format(cl_id) from kafka_tag3_log where cl_id = '{}'""".format(cl_id)
db, cursor = get_doris_prod() db, cursor = get_doris_prod()
...@@ -465,6 +467,8 @@ def get_tag3_user_log(cl_id): ...@@ -465,6 +467,8 @@ def get_tag3_user_log(cl_id):
user_df["days_diff_now"] = round((int(time.time()) - user_df["log_time"].astype(float)) / (24 * 60 * 60)) user_df["days_diff_now"] = round((int(time.time()) - user_df["log_time"].astype(float)) / (24 * 60 * 60))
user_df["tag_score"] = user_df.apply(lambda x: compute_tag3_score(x), axis=1) user_df["tag_score"] = user_df.apply(lambda x: compute_tag3_score(x), axis=1)
user_df["business_tags"] = user_df["business_tags"].fillna("") user_df["business_tags"] = user_df["business_tags"].fillna("")
user_df["selected_stars"] = user_df["selected_stars"].fillna("")
user_df["selected_internet_celebrity"] = user_df["selected_internet_celebrity"].fillna("")
return user_df return user_df
except Exception as e: except Exception as e:
print(e) print(e)
...@@ -513,15 +517,17 @@ def get_tag3_user_order_log(cl_id): ...@@ -513,15 +517,17 @@ def get_tag3_user_order_log(cl_id):
# PRIMARY KEY(`id`) # PRIMARY KEY(`id`)
# ) # )
# ALTER TABLE `user_tag3_portrait` ADD COLUMN business_tags text COMMENT '商业标签'; # ALTER TABLE `user_tag3_portrait` ADD COLUMN business_tags text COMMENT '商业标签';
# ALTER TABLE `user_tag3_portrait` ADD COLUMN selected_internet_celebrity text COMMENT '网红';
# ALTER TABLE `user_tag3_portrait` ADD COLUMN selected_stars text COMMENT '明星';
def write_user_portrait(cl_id, first_solutions, second_solutions, first_demands, second_demands, first_positions, def write_user_portrait(cl_id, first_solutions, second_solutions, first_demands, second_demands, first_positions,
second_positions, projects, business_tags): second_positions, projects, business_tags, selected_stars, selected_internet_celebrity):
try: try:
today = datetime.date.today() today = datetime.date.today()
oneday = datetime.timedelta(days=1) oneday = datetime.timedelta(days=1)
yesterday = today - oneday yesterday = today - oneday
sql = """insert into user_tag3_portrait values(null, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')""".format( sql = """insert into user_tag3_portrait values(null, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')""".format(
yesterday, cl_id, first_solutions, second_solutions, first_demands, second_demands, first_positions, second_positions, yesterday, cl_id, first_solutions, second_solutions, first_demands, second_demands, first_positions, second_positions,
projects, business_tags) projects, business_tags, selected_stars, selected_internet_celebrity)
db, cursor = get_jerry_test() db, cursor = get_jerry_test()
cursor.execute(sql) cursor.execute(sql)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment