Commit 5e3c6a1e authored by 高雅喆's avatar 高雅喆

画像时间衰减上线

parent f0dcca73
...@@ -94,11 +94,11 @@ def get_user_service_portrait(cl_id, all_word_tags, all_tag_tag_type, all_3tag_2 ...@@ -94,11 +94,11 @@ def get_user_service_portrait(cl_id, all_word_tags, all_tag_tag_type, all_3tag_2
user_df_service["tag2_type"] = user_df_service.apply(lambda x: all_tag_tag_type.get(x["tag2"]), axis=1) user_df_service["tag2_type"] = user_df_service.apply(lambda x: all_tag_tag_type.get(x["tag2"]), axis=1)
# 算分及比例 # 算分及比例
user_df_service["tag_score"] = user_df_service.apply( user_df_service["tag_score"] = user_df_service.apply(
lambda x: compute_henqiang(x.days_diff_now)/get_action_tag_count(user_df_service, x.time) if x.score_type == "henqiang" else ( lambda x: compute_henqiang(x.days_diff_now, exponential=1)/get_action_tag_count(user_df_service, x.time) if x.score_type == "henqiang" else (
compute_jiaoqiang(x.days_diff_now)/get_action_tag_count(user_df_service, x.time) if x.score_type == "jiaoqiang" else ( compute_jiaoqiang(x.days_diff_now, exponential=1)/get_action_tag_count(user_df_service, x.time) if x.score_type == "jiaoqiang" else (
compute_ai_scan(x.days_diff_now)/get_action_tag_count(user_df_service, x.time) if x.score_type == "ai_scan" else ( compute_ai_scan(x.days_diff_now, exponential=1)/get_action_tag_count(user_df_service, x.time) if x.score_type == "ai_scan" else (
compute_ruoyixiang(x.days_diff_now)/get_action_tag_count(user_df_service, x.time) if x.score_type == "ruoyixiang" else compute_ruoyixiang(x.days_diff_now, exponential=1)/get_action_tag_count(user_df_service, x.time) if x.score_type == "ruoyixiang" else
compute_validate(x.days_diff_now)/get_action_tag_count(user_df_service, x.time)))), axis=1) compute_validate(x.days_diff_now, exponential=1)/get_action_tag_count(user_df_service, x.time)))), axis=1)
tag_score_sum = user_df_service.groupby(by=["tag2", "tag2_type"]).agg( tag_score_sum = user_df_service.groupby(by=["tag2", "tag2_type"]).agg(
{'tag_score': 'sum', 'cl_id': 'first', 'action': 'first'}).reset_index().sort_values(by=["tag_score"], {'tag_score': 'sum', 'cl_id': 'first', 'action': 'first'}).reset_index().sort_values(by=["tag_score"],
ascending=False) ascending=False)
......
...@@ -32,7 +32,7 @@ def setup_logger(logger_name, log_file, level=logging.INFO): ...@@ -32,7 +32,7 @@ def setup_logger(logger_name, log_file, level=logging.INFO):
def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type, pay_time, all_3tag_2tag, version=1, def get_user_service_portrait_not_alipay(cl_id, all_word_tags, all_tag_tag_type, pay_time, all_3tag_2tag, version=1,
exponential=0, normalization_size=7, decay_days=180, size=10): exponential=0, normalization_size=7, decay_days=30, size=10):
""" """
:param cl_id: :param cl_id:
:param all_word_tags: :param all_word_tags:
...@@ -305,7 +305,7 @@ if __name__ == '__main__': ...@@ -305,7 +305,7 @@ if __name__ == '__main__':
parser.add_argument("-e", "--exponential", type=int, dest="exponential", default=0, help="是否采用指数衰减") parser.add_argument("-e", "--exponential", type=int, dest="exponential", default=0, help="是否采用指数衰减")
parser.add_argument("-n", "--normalization_size", type=int, dest="normalization_size", default=7, parser.add_argument("-n", "--normalization_size", type=int, dest="normalization_size", default=7,
help="天数差归一化的区间") help="天数差归一化的区间")
parser.add_argument("-d", "--decay_days", type=int, dest="decay_days", default=180, help="分数衰减的天数") parser.add_argument("-d", "--decay_days", type=int, dest="decay_days", default=30, help="分数衰减的天数")
parser.add_argument("-a", "--action_type", dest="action_type", nargs='+', help="计算匹配度的行为") parser.add_argument("-a", "--action_type", dest="action_type", nargs='+', help="计算匹配度的行为")
parser.add_argument("-s", "--save_tidb", type=int, dest="save_tidb", default=1, help="统计结果是否存tidb") parser.add_argument("-s", "--save_tidb", type=int, dest="save_tidb", default=1, help="统计结果是否存tidb")
args = parser.parse_args() args = parser.parse_args()
......
...@@ -187,7 +187,7 @@ def get_tag2_from_tag3(tag3, all_3tag_2tag, user_log_df_tag2_list): ...@@ -187,7 +187,7 @@ def get_tag2_from_tag3(tag3, all_3tag_2tag, user_log_df_tag2_list):
print(e) print(e)
def compute_henqiang(x, decay_days=180, normalization_size=7, exponential=0): def compute_henqiang(x, decay_days=30, normalization_size=7, exponential=0):
if exponential: if exponential:
alpha = exponential_decay(x, decay_days, normalization_size) alpha = exponential_decay(x, decay_days, normalization_size)
score = 15 - 2**alpha * ((15-0.5)/decay_days) score = 15 - 2**alpha * ((15-0.5)/decay_days)
...@@ -197,7 +197,7 @@ def compute_henqiang(x, decay_days=180, normalization_size=7, exponential=0): ...@@ -197,7 +197,7 @@ def compute_henqiang(x, decay_days=180, normalization_size=7, exponential=0):
return score return score
else: else:
return 0.5 return 0.5
def compute_jiaoqiang(x, decay_days=180, normalization_size=7, exponential=0): def compute_jiaoqiang(x, decay_days=30, normalization_size=7, exponential=0):
if exponential: if exponential:
alpha = exponential_decay(x, decay_days, normalization_size) alpha = exponential_decay(x, decay_days, normalization_size)
score = 12 - 2**alpha * ((12-0.5)/decay_days) score = 12 - 2**alpha * ((12-0.5)/decay_days)
...@@ -207,7 +207,7 @@ def compute_jiaoqiang(x, decay_days=180, normalization_size=7, exponential=0): ...@@ -207,7 +207,7 @@ def compute_jiaoqiang(x, decay_days=180, normalization_size=7, exponential=0):
return score return score
else: else:
return 0.5 return 0.5
def compute_ruoyixiang(x, decay_days=180, normalization_size=7, exponential=0): def compute_ruoyixiang(x, decay_days=30, normalization_size=7, exponential=0):
if exponential: if exponential:
alpha = exponential_decay(x, decay_days, normalization_size) alpha = exponential_decay(x, decay_days, normalization_size)
score = 5 - 2**alpha * ((5-0.5)/decay_days) score = 5 - 2**alpha * ((5-0.5)/decay_days)
...@@ -217,7 +217,7 @@ def compute_ruoyixiang(x, decay_days=180, normalization_size=7, exponential=0): ...@@ -217,7 +217,7 @@ def compute_ruoyixiang(x, decay_days=180, normalization_size=7, exponential=0):
return score return score
else: else:
return 0.5 return 0.5
def compute_validate(x, decay_days=180, normalization_size=7, exponential=0): def compute_validate(x, decay_days=30, normalization_size=7, exponential=0):
if exponential: if exponential:
alpha = exponential_decay(x, decay_days, normalization_size) alpha = exponential_decay(x, decay_days, normalization_size)
score = 10 - 2**alpha * ((10-0.5)/decay_days) score = 10 - 2**alpha * ((10-0.5)/decay_days)
...@@ -227,7 +227,7 @@ def compute_validate(x, decay_days=180, normalization_size=7, exponential=0): ...@@ -227,7 +227,7 @@ def compute_validate(x, decay_days=180, normalization_size=7, exponential=0):
return score return score
else: else:
return 0.5 return 0.5
def compute_ai_scan(x, decay_days=180, normalization_size=7, exponential=0): def compute_ai_scan(x, decay_days=30, normalization_size=7, exponential=0):
if exponential: if exponential:
alpha = exponential_decay(x, decay_days, normalization_size) alpha = exponential_decay(x, decay_days, normalization_size)
score = 2 - 2**alpha * ((2-0.5)/decay_days) score = 2 - 2**alpha * ((2-0.5)/decay_days)
...@@ -247,7 +247,7 @@ def get_action_tag_count(df, action_time): ...@@ -247,7 +247,7 @@ def get_action_tag_count(df, action_time):
print(e) print(e)
def exponential_decay(days_diff, decay_days=180, normalization_size=7): def exponential_decay(days_diff, decay_days=30, normalization_size=7):
x = np.arange(1, decay_days+1, 1) x = np.arange(1, decay_days+1, 1)
# 天数差归一化到[0, normalization_size] # 天数差归一化到[0, normalization_size]
a = (normalization_size - 0) * (days_diff - min(x)) / (max(x) - min(x)) a = (normalization_size - 0) * (days_diff - min(x)) / (max(x) - min(x))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment