20191111用户行为日志

681909bc · 高雅喆 · 44b26426 · 681909bc
Commit 681909bc authored Nov 12, 2019 by 高雅喆
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 5 deletions

gyz_test.py eda/smart_rank/gyz_test.py +6 -5

No files found.
--- a/eda/smart_rank/gyz_test.py
+++ b/eda/smart_rank/gyz_test.py
@@ -65,13 +65,15 @@ def get_user_service_portrait(x, all_word_tags, all_tag_tag_type, all_3tag_2tag,

 # data
 device_info = []
-with open("/home/gmuser/gyz/log/have_search_device_20191105.csv", "r") as f:
+
+# sql: select cl_id, collect_set(params["query"]) from bl_hdfs_maidian_updates where partition_date="20191111" and action="do_search" group by cl_id
+with open("/home/gmuser/gyz/log/have_search_device_20191111.csv", "r") as f:
    for line in f.readlines():
        data = line.strip().split("=")
        device = data[0]
        search_words = eval(data[1])
        device_info.append([device, search_words])
-pay_time = 1572883200
+pay_time = 1573401600
 # 获取搜索词及其近义词对应的tag
 all_word_tags = get_all_word_tags()
 all_tag_tag_type = get_all_tag_tag_type()
@@ -100,5 +102,5 @@ result = device_ids_lst_rdd.repartition(100).map(lambda x: get_user_service_port
 print(result.count())
 print(result.take(10))
 df = spark.createDataFrame(result).na.drop().toDF("device", "search_words", "user_portrait").na.drop().toPandas()
-df.to_csv("~/test_df.csv", index=False)
-spark.stop()
\ No newline at end of file
+df.to_csv("~/gyz/log/user_action_20191111.csv", index=False)
+spark.stop()