Commit f7bbb62a authored by 王志伟's avatar 王志伟

增加搜索CTR统计

parent 3de84bbd
......@@ -5,7 +5,7 @@ import java.text.SimpleDateFormat
import breeze.linalg.split
import com.gmei.WeafareStat.{defaultParams, parser}
import org.apache.spark.sql.{SaveMode, TiContext}
import org.apache.spark.sql.{Row, SaveMode, SparkSession, TiContext}
import org.apache.log4j.{Level, Logger}
import scopt.OptionParser
import com.gmei.lib.AbstractParams
......@@ -13,6 +13,10 @@ import com.github.nscala_time.time.Imports._
import java.text.SimpleDateFormat
import java.util.Date
import com.sun.tools.internal.jxc.SchemaGenerator.Runner
import scala.util.parsing.json.JSON
object temp_count {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
......@@ -353,4 +357,219 @@ object Repeated_content_recommendation_moreday {
}
}
\ No newline at end of file
}
object GetHiveSearchData {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev",
date: String = "2018-08-01"
) extends AbstractParams[Params] with Serializable
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("WeafareStat")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
opt[String] ("date")
.text(s"the date you used")
.action((x,c) => c.copy(date = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
""".stripMargin +
s"| --env ${defaultParams.env}"
)
}
def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure_precise")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
// val stat_date = GmeiConfig.getMinusNDate(1)
val stat_date = "2019-01-16"
val partition_date = stat_date.replace("-","")
val strDiaryExposureAction = "/api/search/v2/diary"
val strDiaryClickAction = "search_result_more_diary_click_item" //需要确认
var (diaryExposureVal,diaryClickNum,diaryExposureMapCount,diaryExposureFilterCount) = GetSearchResultData(sc,strDiaryExposureAction,strDiaryClickAction)
val strMeigouExposureAction = "/api/search/v2/service"
val strMeigouClickAction = "search_result_welfare_click_item"
var (meigouExposureVal,meigouClickNum,meigouExposureMapCount,meigouExposureFilterCount) = GetSearchResultData(sc,strMeigouExposureAction,strMeigouClickAction)
val strSearchResultExposureAction = "/api/search/v2/content"
val strSearchResultClickAction = "search_result_click_diary_item" //需要确认
var (searchResultExposureVal,searchResultClickNum,searchResultExposureMapCount,searchResultExposureFilterCount) = GetSearchResultData(sc,strSearchResultExposureAction,strSearchResultClickAction)
val strSearchDoctorExposureAction = "/api/search/v2/doctor"
val strSearchDoctorClickAction = "search_result_doctor_click_item"
var (searchDoctorExposureVal,searchDoctorClickNum,searchDoctorExposureMapCount,searchDoctorExposureFilterCount) = GetSearchResultData(sc,strSearchDoctorExposureAction,strSearchDoctorClickAction)
val strSearchHospitalExposureAction = "/api/search/v2/hospital"
val strSearchHospitalClickAction = "search_result_hospital_click_item"
var (searchHospitalExposureVal,searchHospitalClickNum,searchHospitalExposureMapCount,searchHospitalExposureFilterCount) = GetSearchResultData(sc,strSearchHospitalExposureAction,strSearchHospitalClickAction)
val diary_clickSql = sc.sql(
s"""
|select
|count(1) click_num
|from online.tl_hdfs_maidian_view where partition_date='${partition_date}'
|and action='on_click_diary_card'
|and params['page_name']='search_result_diary'
""".stripMargin
)
val diary_clickArray = diary_clickSql.collect()
val diary_click_num = diary_clickArray(0).getAs[Long]("click_num")
val content_diary_clickSql = sc.sql(
s"""
|select
|count(1) click_num
|from online.tl_hdfs_maidian_view where partition_date='${partition_date}'
|and action='on_click_diary_card'
|and params['page_name']='search_result_more'
""".stripMargin
)
val content_diary_clickArray:Array[Row] = content_diary_clickSql.collect()
val content_diary_click_num:Long = content_diary_clickArray(0).getAs[Long]("click_num")
println("searchDiaryExposureVal:" + diaryExposureVal + "\tsearchDiaryClickNum:" + diary_click_num + "\tclickRate:" + (diary_click_num.floatValue()/diaryExposureVal.floatValue()).formatted("%.2f"))
println("searchMeigouExposureVal:" + meigouExposureVal + "\tsearchMeigouClickNum:" + meigouClickNum + "\tclickRate:" + (meigouClickNum.floatValue()/meigouExposureVal.floatValue()).formatted("%.2f"))
println("searchResultExposureVal:" + searchResultExposureVal + "\tsearchResultClickNum:" + (searchResultClickNum+content_diary_click_num) + "\tclickRate:" + ((searchResultClickNum+content_diary_click_num).floatValue()/searchResultExposureVal.floatValue()).formatted("%.2f"))
println("searchDoctorExposureVal:" + searchDoctorExposureVal + "\tsearchDoctorClickNum:" + searchDoctorClickNum + "\tclickRate:" + (searchDoctorClickNum.floatValue()/searchDoctorExposureVal.floatValue()).formatted("%.2f"))
println("searchHospitalExposureVal:" + searchHospitalExposureVal + "\tsearchHospitalClickNum:" + searchHospitalClickNum + "\tclickRate:" + (searchHospitalClickNum.floatValue()/searchHospitalExposureVal.floatValue()).formatted("%.2f"))
}
def GetSearchResultData(spark: SparkSession, strExposureAction:String, strClickAction:String) = {
val exposureAccum = spark.sparkContext.longAccumulator("search exposure data")
val exposureSql = spark.sql(
s"""
|select
|action,
|user_id,
|city_id,
|app
|from online.tl_hdfs_backend_view where action='$strExposureAction' and partition_date='${partition_date}'
""".stripMargin
)
val exposureMapResult = exposureSql.rdd.map(row => {
//val jsonObj = JSON.parseFull(row.getAs[Map[String,Any]]("app").toString())
val rowAppFieldMap:Map[String,Any] = row.getAs[Map[String,Any]]("app")
if (rowAppFieldMap.nonEmpty)
{
// jsonMap:Map[String,Any] = jsonObj.get.asInstanceOf[Map[String,Any]]
if (rowAppFieldMap.contains("exposure_data")){
val exposure_data_lists:List[Any] = JSON.parseFull(rowAppFieldMap("exposure_data").toString).get.asInstanceOf[List[Any]]
if (exposure_data_lists.length > 0){
exposure_data_lists.foreach(exposure_item=>{
if (exposure_item!=None && exposure_item.toString.nonEmpty){
val exposureItemMap:Map[String,Any] = exposure_item.asInstanceOf[Map[String,Any]]
if (exposureItemMap.contains("list_ids")){
//val exposure_list_ids:List[Any] = exposureItemMap.get("list_ids").get.asInstanceOf[List[Any]]
val exposure_list_ids:List[Any] = exposureItemMap("list_ids").asInstanceOf[List[Any]]
exposureAccum.add(exposure_list_ids.length)
}
}else{
None
}
})
exposure_data_lists
}else{
None
}
}
}else{
None
}
})
//must add cache
exposureMapResult.cache()
val exposureFilterResult = exposureMapResult.filter(_.!=(None))
//val exposureArray:Array[Any] = exposureFilterResult.collect()
//exposureArray.foreach(item => println(item.toString))
val exposureMapCount:Long = exposureMapResult.count()
val exposureFilterCount:Long = exposureFilterResult.count()
val clickSql = spark.sql(
s"""
|select
|count(1) click_num
|from online.tl_hdfs_maidian_view where partition_date='${partition_date}' and action='$strClickAction'
""".stripMargin
)
val clickArray:Array[Row] = clickSql.collect()
val click_num:Long = clickArray(0).getAs[Long]("click_num")
(exposureAccum.value,click_num,exposureMapCount,exposureFilterCount)
}
// GmeiConfig.writeToJDBCTable(df_result, table = "Repeated_content_recommendation_moreday", SaveMode.Append)
// exp_diary.show()
// exp_diary.createOrReplaceTempView("exp_diary")
// GmeiConfig.writeToJDBCTable(df, table = "Repeated_evaluation_indicator_moreday", SaveMode.Append)
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment