Commit 681909bc authored by 高雅喆's avatar 高雅喆

20191111用户行为日志

parent 44b26426
......@@ -65,13 +65,15 @@ def get_user_service_portrait(x, all_word_tags, all_tag_tag_type, all_3tag_2tag,
# data
device_info = []
with open("/home/gmuser/gyz/log/have_search_device_20191105.csv", "r") as f:
# sql: select cl_id, collect_set(params["query"]) from bl_hdfs_maidian_updates where partition_date="20191111" and action="do_search" group by cl_id
with open("/home/gmuser/gyz/log/have_search_device_20191111.csv", "r") as f:
for line in f.readlines():
data = line.strip().split("=")
device = data[0]
search_words = eval(data[1])
device_info.append([device, search_words])
pay_time = 1572883200
pay_time = 1573401600
# 获取搜索词及其近义词对应的tag
all_word_tags = get_all_word_tags()
all_tag_tag_type = get_all_tag_tag_type()
......@@ -100,5 +102,5 @@ result = device_ids_lst_rdd.repartition(100).map(lambda x: get_user_service_port
print(result.count())
print(result.take(10))
df = spark.createDataFrame(result).na.drop().toDF("device", "search_words", "user_portrait").na.drop().toPandas()
df.to_csv("~/test_df.csv", index=False)
spark.stop()
\ No newline at end of file
df.to_csv("~/gyz/log/user_action_20191111.csv", index=False)
spark.stop()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment