Commit 8136596b authored by 王志伟's avatar 王志伟

取icon训练数据

parent 5455edea
......@@ -483,6 +483,7 @@ object icon_train_data {
|from online.tl_hdfs_maidian_view
|where action = 'on_click_diary_card'
|and partition_date='${partition_date}'
|and params['diary_id'] is not null
""".stripMargin
)
diary_click.show()
......@@ -520,34 +521,26 @@ object icon_train_data {
diary_tag_detail.createOrReplaceTempView("diary_tag_detail")
val temp_diary = sc.sql(
s"""
|select device_id,concat_ws(':',level1_id,'0.25') as level1_value,concat_ws(':',level2_id,'0.5') as level2_value,concat_ws(':',level3_id,'1') as level3_value
|select device_id,tag_id,level1_id
|from diary_tag_detail
|union all
|select device_id,tag_id,level2_id
|from diary_tag_detail
|union all
|select device_id,tag_id,level3_id
|from diary_tag_detail
""".stripMargin
)
temp_diary.show()
temp_diary.createOrReplaceTempView("temp_diary")
val test_diary = sc.sql(
s"""
|select device_id,concat_ws(',',level1_value,level2_value,level3_value) as tag_list
|from temp_diary
""".stripMargin
)
test_diary.show()
// val Df2 = test_diary.withColumn("tag_list",explode(split()))
// Df2.show()
import sc.implicits._
val test=test_diary.rdd.map(row=>(row(0).toString,row(1).toString))
.map(row=>(row._1,row._2.split(",").head,row._2.split(",")(1),row._2.split(",")(2))).toDF().show()
// val df_explode = test_diary.withColumn("e", explode(split(test_diary['tag_list'], ","))).drop("tag_list")
// val test=test_diary.rdd.map(row=>(row(0).toString,row(1).toString))
// .map(row=>(row._1,row._2.split(",").head,row._2.split(",")(1),row._2.split(",")(2))).toDF().show()
// print("写入开始")
//
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment