Commit 58bcd621 authored by 高雅喆's avatar 高雅喆

update

parent f5cd343f
...@@ -99,7 +99,7 @@ device_ids_lst_rdd = spark.sparkContext.parallelize(device_info) ...@@ -99,7 +99,7 @@ device_ids_lst_rdd = spark.sparkContext.parallelize(device_info)
result = device_ids_lst_rdd.repartition(100).map(lambda x: get_user_service_portrait(x, all_word_tags, all_tag_tag_type, all_3tag_2tag, all_tags_name, size=None, pay_time=pay_time)) result = device_ids_lst_rdd.repartition(100).map(lambda x: get_user_service_portrait(x, all_word_tags, all_tag_tag_type, all_3tag_2tag, all_tags_name, size=None, pay_time=pay_time))
print(result.take(10)) print(result.take(10))
result1 = result.map( result1 = result.map(
lambda x: (x[1], x[2], x[3]) lambda x: (x[0], x[1], x[2])
) )
path = "hdfs:///strategy/esmm/" path = "hdfs:///strategy/esmm/"
spark.createDataFrame(result1).toDF("device", "search_words", "user_portrait").repartition(1).write.format("csv").save(path=path + "portrait/", mode="overwrite") spark.createDataFrame(result1).toDF("device", "search_words", "user_portrait").repartition(1).write.format("csv").save(path=path + "portrait/", mode="overwrite")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment