Commit 07e6573a authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

去除机构黑名
parents c05844b7 1b58256e
......@@ -175,3 +175,70 @@ object WeafareStat {
}
object NdDataInput {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev"
) extends AbstractParams[Params] with Serializable
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("WeafareStat")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
note("winter is coming")
}
def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod", tableName = "nd_data_meigou_cid")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
ti.tidbMapTable(dbName = "eagle", tableName = "feed_diary_boost")
val date8 = GmeiConfig.getMinusNDate(5)
val tidb_inupt = sc.sql(
s"""
|SELECT
| split(service_id,'\\\\|')[1] as sid,split(cid,'\\\\|')[1] as cid
|FROM nd_data_meigou_cid
|where stat_date > '${date8}'
""".stripMargin
)
tidb_inupt.createOrReplaceTempView("tmp1")
tidb_inupt.show()
println(tidb_inupt.count())
val yesteday = GmeiConfig.getMinusNDate(1).replace("-","")
val result01 = sc.sql(
s"""
|select a.sid as sid, a.cid as cid, b.tag_id as ctag_id, c.level1_id as clevel1_id
|from tmp1 a
|left join online.tl_hdfs_diary_tags_view b on a.cid=b.diary_id
|left join online.bl_tag_hierarchy_detail c on b.tag_id=c.id
|where b.partition_date='${yesteday}'
|and c.partition_date='${yesteday}'
""".stripMargin
)
result01.createOrReplaceTempView("tmp2")
result01.show()
println(result01.count())
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment