Commit 729383ca authored by 高雅喆's avatar 高雅喆

update

parent 82ed7086
......@@ -25,6 +25,7 @@ def get_user_service_portrait(x, all_word_tags, all_tag_tag_type, all_3tag_2tag,
cl_id = x[1]
order_tag_id = x[2]
order_tag_id_score = 0.0
tag_position = -1
user_df_service = get_user_log(cl_id, all_word_tags, pay_time=pay_time)
# 增加df字段(days_diff_now, tag_type, tag2)
......@@ -56,10 +57,12 @@ def get_user_service_portrait(x, all_word_tags, all_tag_tag_type, all_3tag_2tag,
)
gmkv_tag_score2_sum = tag_score_sum[["tag2", "tag_score"]][:size].to_dict('record')
gmkv_tag_score2_sum_dict = {i["tag2"]: i["tag_score"] for i in gmkv_tag_score2_sum}
tag_list = [i['tag2'] for i in gmkv_tag_score2_sum]
order_tag_id_score = gmkv_tag_score2_sum_dict.get(int(order_tag_id), 0.0)
return pay_time, cl_id, order_tag_id, order_tag_id_score
tag_position = tag_list.index(int(order_tag_id))+1 if int(order_tag_id) in tag_list else -1
return pay_time, cl_id, order_tag_id, order_tag_id_score, tag_position
else:
return pay_time, cl_id, order_tag_id, 0.0
return pay_time, cl_id, order_tag_id, 0.0, -1
# 获取近一个月设备下单的时间、设备id、标签id
......@@ -105,6 +108,6 @@ device_ids_lst_rdd = spark.sparkContext.parallelize(device_info)
result = device_ids_lst_rdd.repartition(100).map(lambda x: get_user_service_portrait(x, all_word_tags, all_tag_tag_type, all_3tag_2tag, all_tags_name, size=None)).filter(lambda x: x is not None)
print(result.count())
print(result.take(10))
df = spark.createDataFrame(result).na.drop().toDF("pay_time", "cl_id", "order_tag_id", "order_tag_id_score").na.drop().toPandas()
df.to_csv("~/gyz/log/stat_device_order_portrait_score_1106_1206.csv", index=False)
df = spark.createDataFrame(result).na.drop().toDF("pay_time", "cl_id", "order_tag_id", "order_tag_id_score", "tag_index").na.drop().toPandas()
df.to_csv("~/gyz/log/stat_device_order_portrait_score_1106_1206_v2.csv", index=False)
spark.stop()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment