Commit fb4b8829 authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

add maxlevelid
parents 33814b70 656781eb
......@@ -225,7 +225,6 @@ object EsmmData {
}
object EsmmPredData {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
......@@ -432,7 +431,6 @@ object EsmmPredData {
}
object GetDiaryPortrait {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
......@@ -603,4 +601,84 @@ object GetDevicePortrait {
}
}
}
object GetLevelCount {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev",
path: String = null
) extends AbstractParams[Params] with Serializable
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("EsmmData")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
opt[String]("path")
.text(s"the path you used")
.action((x,c) => c.copy(path = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.EsmmData ./target/scala-2.11/feededa-assembly-0.1.jar \
""".stripMargin +
s"| --env ${defaultParams.env}"
)
}
def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod",tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod",tableName = "diary_feat")
import sc.implicits._
val stat_date = GmeiConfig.getMinusNDate(1).replace("-","")
// val diary_queue = sc.read.json(param.path).rdd.map(x => x(0).toString).distinct().collect().toList.mkString(",")
val diary_queue = "16283654,16211979,15331340,15534094,13602830,16228368,16211987,15990809,16234522,15386660,15843365,15759398,16306213,15597614,15298621,16134214,15302730,15652939,16193613,15269965,15302734,15466578,15386706,15491159,16101468,15515751,12777586,16304243,15521916,15978625,16435351,15650970,15712411,15544488,15294642,16277682,16425141,16203962,15202492,15386814,15474889,15636685,16101582,16251087,15300823,15300825,15345884,16257252,16214244,16234732,16056557,15247597,16199918,15378686,16267518,16240897,16195843,16220434,16257303,16232729,15491360,16199977,15391028,15491383,15628603,16089403,16357697,16339269,16298324,14969178,15573339,16193883,16419166,15360356,15573353,16132458,14229868,15475055,16234869,15827317,16413055,16298367,16425343,16193927,13986185,16361866,15475082,16245135,15292816,16425364,15544727,16116121,16085403,16136607,16253346,15419823,15481272,16202171,16431553,16419272,15385035,16269779,16417251,15954409,15890925,15731191,16261624,16157187,16130565,15206918,14492168,16294414,15729169,16419346,15479315,16054807,16175641,15239718,15299111,15309353,16173613,15231542,16269882,16251451,16353856,16228931,16300613,15346247,15874634,16308812,16134739,15577701,16208485,15420015,15628919,16061066,16140950,16122519,15751833,16298666,16282308,16141002,16239307,15841996,15565517,12747475,16134867,16122580,16083671,15485655,15196891,16134876,16202461,16202460,16354020,15903463,15082216,15842031,15299312,16397053,15430398,15506175,15387395,16177932,16272144,15891227,16098076,16255792,15594296,14971705,15649596,16118595,16294724,15741766,15287122,15387482,16108382,15477602,16354162,15764357,15883142,15887237,16450441,15477641,16049036,15371151,15276945,15416220,15471518,15360927,15246246,15748007,15578022,15195049,15860650,15489962,16163758,16214959,15332272,16049074,16055221,16296887,15881144,15256507,16200635,15272899,16272333,15338446,16376782,13278160,15385553,15967185,15338448,15467478,15299545,16397281,15461348,12284918,15901687,15361021,15266817,16114690,15625223,15256584,16194569,16194571,15950864,16204819,16049173,15531030,15397912,15883288,15604767,15729700,15504420,15987751,15572010,15615019,16403502,16225332,15891509,15778870,15903804,15545409,15569985,16297034,15572042,15572044,16198733,15545422,15946834,16116818,15336535,16116832,15336547,16266340,16323686,16116854,15621245,15414398,16297085,16116869,16221320,15887497,16225416,16112786,16225427,16123026,16430232,16204953,15895704,16434331,15545497,15912093,16299168,16059552,16204980,15299765,15420603,16399555,15639757,16084175,15361235,15633625,16116953,16065775,16233712,15856889,15375611,16194812,15594747,15609095,15594779,16262442,15420718,16035120,16137522,16405818,15420734,16233792,15570251,15967572,16266581,15639895,16084313,16293219,15592807,16371047,16422248,16246122,16153967,16131449,15349114,15746428,15746434,15297929,15527308,16145806,16317847,16061852,16246173,15912356,13163949,15429039,16041397,16197047,15803831,16207296,15443404,16121301,16127449,16213470,16115168,15629799,15336944,16338429,15629822,15750663,16129543,15568395,15564307,15646229,15441430,15369765,16354853,15441449,15576619,16301612,16199213,16215596,15644209,15994422,16258615,15482427,16096830,15595074,16299587,15414853,15418950,16268873,15988304,16084561,16305752,15603296,15328874,16399988,15877749,16354954,15949451,14542485,16219798,16107161,15345305,15990434,16400037,15720101,16035495,15859365,16375466,15214253,15769263,15328957,15976127,15769280,15519424,16238276,15576775,15253194,16197323,15261387,15591116,16197330,15390421,15306456,15388381,15515359,16258786,16258787,15519458,15990507,16258797,15519472,16166642,15904499,15199988,15990518,15748854,16422648,15533817,16140026,16004862,15986431,15296256,15910656,16193282,15714050,15931142,15834886,16049931,15232783,16426770,16115479,15519511,15519517,16228125,16424738,16297765,16162597,16142120,15980332,15458095,16244538,15580990,15988542,15398719,16269126,16119624,15458127,15966031,16420691,15880026,16185182,16406366,15880033,15880036,15521638,16088936,15533937,16213880,16111482,16199552,15513474,15961993,15986570,15970190,15644562,16138136,16424856,15490981,15402927,16406450,15511478,15747009,15632328,16068554,15966159,15271888,15302622,16191459,16222181,15890407,15966189,16275439,15237104,16424945,16300020,15300599,16050175"
val diary_level1 = sc.sql(
s"""
|select diary_id,explode(split(level1_ids,';')) level1_id from diary_feat
|where diary_id in (${diary_queue})
""".stripMargin
)
diary_level1.show()
println(diary_level1.count())
//胸部日记id
val cid_xiong = diary_level1.rdd.filter(_.getAs("level1_id")=="7")
cid_xiong.collect().foreach(println)
//计算各类别日记的数量
val level1_count = diary_level1.rdd.map(x => (x(1).toString)).map(level1 => (level1,1)).reduceByKey((a,b) => a+b).sortBy(_._2,false).toDF("level1_id","count")
level1_count.show()
level1_count.createOrReplaceTempView("tmp")
val level1_name = sc.sql(
s"""
|select a.level1_id,a.count,b.level1_name from tmp a
|left join (select distinct level1_id,level1_name from online.bl_tag_hierarchy_detail where partition_date = '${stat_date}') b
|on a.level1_id = b.level1_id order by a.count desc
""".stripMargin
)
level1_name.show()
sc.stop()
}
}
}
\ No newline at end of file
......@@ -615,99 +615,118 @@ object smart_rank_count {
final_id.createOrReplaceTempView("final_id")
// val user_city_meigou_view = sc.sql(
// s"""
// |select cl_id as device_id,city_id as device_city,params['business_id'] as meigou_id
// |from online.tl_hdfs_maidian_view
// |where action = "page_view"
// |and params['page_name']="welfare_detail"
// |and partition_date >='20181201'
// |and city_id is not null
// """.stripMargin
// )
// user_city_meigou_view.createOrReplaceTempView("user_city_meigou_view")
//
// val meigou_city = sc.sql(
// s"""
// |select b.id as meigou_id,d.city_id as meigou_city
// |from online.tl_meigou_service_view b
// |left join online.tl_hdfs_doctor_view c on b.doctor_id=c.id
// |left join online.tl_hdfs_hospital_view d on c.hospital_id=d.id
// |where b.partition_date='20181227'
// |and c.partition_date='20181227'
// |and d.partition_date='20181227'
// """.stripMargin
// )
// meigou_city.createOrReplaceTempView("meigou_city")
//
//
// val meigou_pv_tongcheng = sc.sql(
// s"""
// |select a.device_id,a.device_city,a.meigou_id,b.meigou_city
// |from user_city_meigou_view a
// |left join meigou_city b
// |on a.meigou_id=b.meigou_id
// """.stripMargin
// )
// meigou_pv_tongcheng.createOrReplaceTempView("meigou_pv_tongcheng")
//
// val meigou_pv_count = sc.sql(
// s"""
// |select meigou_city,count(device_id) as meigou_pv,count(distinct(device_id)) as meigou_device_num
// |from meigou_pv_tongcheng
// |where device_city=meigou_city
// |group by meigou_city
// """.stripMargin
// )
// meigou_pv_count.show()
//
//
////开始计算咨询
// val zixun_meigou_view = sc.sql(
// s"""
// |select cl_id as device_id,city_id as device_city,params['service_id'] as meigou_id
// |from online.tl_hdfs_maidian_view
// |where partition_date >= '20181201'
// |and action = 'welfare_detail_click_message'
// """.stripMargin
// )
// zixun_meigou_view.createOrReplaceTempView("zixun_meigou_view")
//
// val zixun_meigou_tongcheng = sc.sql(
// s"""
// |select a.device_id,a.device_city,a.meigou_id,b.meigou_city
// |from zixun_meigou_view a
// |left join meigou_city b
// |on a.meigou_id=b.meigou_id
// """.stripMargin
// )
// zixun_meigou_tongcheng.createOrReplaceTempView("zixun_meigou_tongcheng")
//
// val zixun_pv_count = sc.sql(
// s"""
// |select meigou_city,count(device_id) as meigou_zixun,count(distinct(device_id)) as meigou_zixun_device_num
// |from zixun_meigou_tongcheng
// |where device_city=meigou_city
// |group by meigou_city
// """.stripMargin
// )
// zixun_pv_count.show()
val user_city_meigou_view = sc.sql(
s"""
|select ov.cl_id as device_id,ov.city_id as device_city,ov.params['business_id'] as meigou_id
|from online.tl_hdfs_maidian_view ov left join final_id
|on ov.cl_id = final_id.device_id
|where ov.action = "page_view"
|and ov.params['page_name']="welfare_detail"
|and ov.partition_date >='20181001'
|and ov.partition_date <'20181101'
|and ov.city_id is not null
|and final_id.device_id is null
""".stripMargin
)
user_city_meigou_view.createOrReplaceTempView("user_city_meigou_view")
val meigou_city = sc.sql(
s"""
|select b.id as meigou_id,d.city_id as meigou_city
|from online.tl_meigou_service_view b
|left join online.tl_hdfs_doctor_view c on b.doctor_id=c.id
|left join online.tl_hdfs_hospital_view d on c.hospital_id=d.id
|where b.partition_date='20181227'
|and c.partition_date='20181227'
|and d.partition_date='20181227'
""".stripMargin
)
meigou_city.createOrReplaceTempView("meigou_city")
val meigou_pv_tongcheng = sc.sql(
s"""
|select a.device_id,a.device_city,a.meigou_id,b.meigou_city
|from user_city_meigou_view a
|left join meigou_city b
|on a.meigou_id = b.meigou_id
""".stripMargin
)
meigou_pv_tongcheng.createOrReplaceTempView("meigou_pv_tongcheng")
val meigou_pv_count = sc.sql(
s"""
|select '2018-10' as stat_date,meigou_city,count(device_id) as meigou_pv,count(distinct(device_id)) as meigou_device_num
|from meigou_pv_tongcheng
|where device_city = meigou_city
|group by meigou_city
""".stripMargin
)
meigou_pv_count.createOrReplaceTempView("meigou_pv_count")
//开始计算咨询
val zixun_meigou_view = sc.sql(
s"""
|select ov.cl_id as device_id,ov.city_id as device_city,ov.params['service_id'] as meigou_id
|from online.tl_hdfs_maidian_view ov left join final_id
|on ov.cl_id = final_id.device_id
|where ov.partition_date >= '20181001'
|and ov.partition_date <'20181101'
|and ov.action = 'welfare_detail_click_message'
|and final_id.device_id is null
""".stripMargin
)
zixun_meigou_view.createOrReplaceTempView("zixun_meigou_view")
val zixun_meigou_tongcheng = sc.sql(
s"""
|select a.device_id,a.device_city,a.meigou_id,b.meigou_city
|from zixun_meigou_view a
|left join meigou_city b
|on a.meigou_id=b.meigou_id
""".stripMargin
)
zixun_meigou_tongcheng.createOrReplaceTempView("zixun_meigou_tongcheng")
val zixun_pv_count = sc.sql(
s"""
|select '2018-10' as stat_date,meigou_city,count(device_id) as meigou_zixun,count(distinct(device_id)) as meigou_zixun_device_num
|from zixun_meigou_tongcheng
|where device_city=meigou_city
|group by meigou_city
""".stripMargin
)
zixun_pv_count.createOrReplaceTempView("zixun_pv_count")
//开始计算每个地区每月新增设备
val device_new_count = sc.sql(
s"""
|select first_city,count(distinct(device_id))
|select first_city,count(distinct(device_id)) as new_device_month
|from online.ml_device_day_active_status
|where active_type != '4'
|and partition_date >='20181201'
|and partition_date >='20181001'
|and partition_date <'20181101'
|group by first_city
""".stripMargin
)
device_new_count.show()
device_new_count.createOrReplaceTempView("device_new_count")
//将所有的数据综合一起
val all_count = sc.sql(
s"""
|select mc.stat_date,mc.meigou_city,mc.meigou_pv,mc.meigou_device_num,zc.meigou_zixun,zc.meigou_zixun_device_num,dc.new_device_month
|from meigou_pv_count mc
|left join zixun_pv_count zc on mc.meigou_city = zc.meigou_city
|left join device_new_count dc on dc.first_city=mc.meigou_city
""".stripMargin
)
all_count.show()
GmeiConfig.writeToJDBCTable(all_count, "smart_rank_count", SaveMode.Append)
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment