Commit 1fbc488a authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

add data
parents ef710cfa 5e213f0c
...@@ -52,10 +52,11 @@ object Data2FFM { ...@@ -52,10 +52,11 @@ object Data2FFM {
ti.tidbMapTable(dbName = "jerry_test", tableName = "esmm_pre_data") ti.tidbMapTable(dbName = "jerry_test", tableName = "esmm_pre_data")
// val yesteday_have_seq = GmeiConfig.getMinusNDate(5) val train_sep_date = GmeiConfig.getMinusNDate(14)
val esmm_data = sc.sql( val esmm_data = sc.sql(
s""" s"""
|select device_id,y,z,stat_date,ucity_id,cid_id,clevel1_id,ccity_name from esmm_train_data |select device_id,y,z,stat_date,ucity_id,cid_id,clevel1_id,ccity_name from esmm_train_data
|where stat_date > '${train_sep_date}'
""".stripMargin """.stripMargin
).repartition(200).na.drop() ).repartition(200).na.drop()
val column_list = esmm_data.columns.filter(x => x != "y" && x != "z") val column_list = esmm_data.columns.filter(x => x != "y" && x != "z")
...@@ -114,7 +115,7 @@ object Data2FFM { ...@@ -114,7 +115,7 @@ object Data2FFM {
val esmm_pre_data = sc.sql( val esmm_pre_data = sc.sql(
s""" s"""
|select device_id,y,z,stat_date,ucity_id,cid_id,clevel1_id,ccity_name |select device_id,y,z,stat_date,ucity_id,cid_id,clevel1_id,ccity_name,label
|from esmm_pre_data |from esmm_pre_data
""".stripMargin """.stripMargin
).repartition(200).na.drop() ).repartition(200).na.drop()
...@@ -132,10 +133,10 @@ object Data2FFM { ...@@ -132,10 +133,10 @@ object Data2FFM {
val rdd_pre = esmm_pre_data.rdd.repartition(200) val rdd_pre = esmm_pre_data.rdd.repartition(200)
.map(x => (x(0).toString,x(1).toString,x(2).toString,x(3).toString, .map(x => (x(0).toString,x(1).toString,x(2).toString,x(3).toString,
x(4).toString,x(5).toString,x(6).toString, x(4).toString,x(5).toString,x(6).toString,
x(7).toString)).filter(x => esmm_join_cids.indexOf(x._6) != -1) x(7).toString,x(8).toString)).filter(x => esmm_join_cids.indexOf(x._6) != -1)
.filter(x => esmm_join_city.indexOf(x._5) != -1) .filter(x => esmm_join_city.indexOf(x._5) != -1)
val pre = rdd_pre.map(x => (x._1,x._2,x._3,column_number("device_id").indexOf(x._1), val native_pre = rdd_pre.filter(x => x._9 == "0").map(x => (x._1,x._2,x._3,column_number("device_id").indexOf(x._1),
column_number("stat_date").indexOf(x._4), column_number("ucity_id").indexOf(x._5), column_number("stat_date").indexOf(x._4), column_number("ucity_id").indexOf(x._5),
column_number("cid_id").indexOf(x._6), column_number("clevel1_id").indexOf(x._7), column_number("cid_id").indexOf(x._6), column_number("clevel1_id").indexOf(x._7),
column_number("ccity_name").indexOf(x._8),x._5,x._6)) column_number("ccity_name").indexOf(x._8),x._5,x._6))
...@@ -144,8 +145,20 @@ object Data2FFM { ...@@ -144,8 +145,20 @@ object Data2FFM {
.map(x => (x._1._1,x._2,x._1._2,x._1._3,x._1._4,x._1._5,x._1._6,x._1._7)) .map(x => (x._1._1,x._2,x._1._2,x._1._3,x._1._4,x._1._5,x._1._6,x._1._7))
.map(x => (x._1,x._2+","+x._3+","+x._4+","+x._5,x._6,x._7,x._8)).toDF("number","data","device_id","city_id","cid") .map(x => (x._1,x._2+","+x._3+","+x._4+","+x._5,x._6,x._7,x._8)).toDF("number","data","device_id","city_id","cid")
println("pre") println("pre")
pre.show(6) native_pre.show(6)
GmeiConfig.writeToJDBCTable(jdbcuri, pre, "esmm_data2ffm_infer", SaveMode.Overwrite) GmeiConfig.writeToJDBCTable(jdbcuri, native_pre, "esmm_data2ffm_infer_native", SaveMode.Overwrite)
val nearby_pre = rdd_pre.filter(x => x._9 == "1").map(x => (x._1,x._2,x._3,column_number("device_id").indexOf(x._1),
column_number("stat_date").indexOf(x._4), column_number("ucity_id").indexOf(x._5),
column_number("cid_id").indexOf(x._6), column_number("clevel1_id").indexOf(x._7),
column_number("ccity_name").indexOf(x._8),x._5,x._6))
.map(x => ((new util.Random).nextInt(2147483647),x._2,x._3,"1:%d:1.0 2:%d:1.0 3:%d:1.0 4:%d:1.0 5:%d:1.0 6:%d:1.0".
format(x._4,x._5,x._6,x._7,x._8,x._9),x._1,x._10,x._11)).zipWithIndex()
.map(x => (x._1._1,x._2,x._1._2,x._1._3,x._1._4,x._1._5,x._1._6,x._1._7))
.map(x => (x._1,x._2+","+x._3+","+x._4+","+x._5,x._6,x._7,x._8)).toDF("number","data","device_id","city_id","cid")
println("pre")
nearby_pre.show(6)
GmeiConfig.writeToJDBCTable(jdbcuri, nearby_pre, "esmm_data2ffm_infer_nearby", SaveMode.Overwrite)
sc.stop() sc.stop()
......
...@@ -240,10 +240,6 @@ object diary_exposure { ...@@ -240,10 +240,6 @@ object diary_exposure {
) )
imp_count.show() imp_count.show()
//
//曝光表中的日记id,去除机构和黑名单 //曝光表中的日记id,去除机构和黑名单
val diary_id_temp = sc.sql( val diary_id_temp = sc.sql(
s""" s"""
......
...@@ -92,7 +92,7 @@ if __name__ == '__main__': ...@@ -92,7 +92,7 @@ if __name__ == '__main__':
output_path = OUTPUT_PATH + "recommendation.csv" output_path = OUTPUT_PATH + "recommendation.csv"
with open(output_path, 'a+') as f: with open(output_path, 'a+') as f:
line = my_date.replace('-','')+','+str(result1_clk[0])+','+str(result1_clk[1])+','+str(result1_clk[2])+','+str(result1_clk[3])\ line = my_date+','+str(result1_clk[0])+','+str(result1_clk[1])+','+str(result1_clk[2])+','+str(result1_clk[3])\
+','+str(result1_imp[0])+','+str(result1_imp[1])+','+str(result1_imp[2])+','+str(result1_imp[3])+','\ +','+str(result1_imp[0])+','+str(result1_imp[1])+','+str(result1_imp[2])+','+str(result1_imp[3])+','\
+str(result2_clk[0])+','+str(result2_clk[1])+','+str(result2_clk[2])+','+str(result2_clk[3])\ +str(result2_clk[0])+','+str(result2_clk[1])+','+str(result2_clk[2])+','+str(result2_clk[3])\
+','+str(result2_imp[0])+','+str(result2_imp[1])+','+str(result2_imp[2])+','+str(result2_imp[3]) \ +','+str(result2_imp[0])+','+str(result2_imp[1])+','+str(result2_imp[2])+','+str(result2_imp[3]) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment