update

17c8332b · 高雅喆 · 948bcc61 · 17c8332b
Commit 17c8332b authored Nov 06, 2019 by 高雅喆
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 2 deletions

gyz_test.py eda/smart_rank/gyz_test.py +8 -2

No files found.
--- a/eda/smart_rank/gyz_test.py
+++ b/eda/smart_rank/gyz_test.py
@@ -52,7 +52,6 @@ def get_user_service_portrait(x, all_word_tags, all_tag_tag_type, all_3tag_2tag,
                    2 if x.action == "api/settlement/alipay_callback" else 1
                ), axis=1
            )
-            gmkv_tag_score_sum = tag_score_sum[["tag2", "tag_score", "weight"]][:size].to_dict('record')
            gmkv_tag_score2_sum = tag_score_sum[["tag2", "tag_score"]][:size].to_dict('record')
            gmkv_tag_score2_sum_dict = {i["tag2"]: i["tag_score"] for i in gmkv_tag_score2_sum}
            gmkv_tag_score3_sum_dict = {all_tags_name[i]: gmkv_tag_score2_sum_dict[i] for i in gmkv_tag_score2_sum_dict}
@@ -95,8 +94,14 @@ spark.sparkContext.addPyFile("/srv/apps/ffm-baseline_git/eda/smart_rank/tool.py"
 device_ids_lst_rdd = spark.sparkContext.parallelize(device_info)
 result = device_ids_lst_rdd.repartition(100).map(lambda x: get_user_service_portrait(x, all_word_tags, all_tag_tag_type, all_3tag_2tag, all_tags_name, size=None, pay_time=pay_time))
 print(result.take(10))
+result1 = result.map(
+    lambda x: (x[1], x[2], x[3])
+)
+path = "hdfs:///strategy/esmm/"
+spark.createDataFrame(result1).toDF("device", "search_words", "user_portrait").repartition(1).write.format("csv").save(path=path + "portrait/", mode="overwrite")
 result.saveAsTextFile("~/test_df.csv")
 # df = result.toDF()
 # df.show()
 # result.write.format('csv').save("~/test_df.csv")
 spark.stop()
\ No newline at end of file