change

4895ca0b · 张彦钊 · eb5b8bdf · 4895ca0b
Commit 4895ca0b authored Oct 12, 2019 by 张彦钊
Hide whitespace changes
Inline Side-by-side

Showing with 93 additions and 76 deletions

meigou.py local/meigou.py +93 -76

No files found.
--- a/local/meigou.py
+++ b/local/meigou.py
@@ -14,104 +14,130 @@ def all_click(x):
    date = (datetime.date.today() - datetime.timedelta(days=x)).strftime("%Y%m%d")
    total.append(date)
-    print("美购首页美购列表卡片点击")
+    print("美购搜索点击")
    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates "
-                    "where partition_date='{}' "
+                    "where partition_date='{}'and action = 'search_result_welfare_click_item' "
-                    "and action = 'goto_welfare_detail' and params['from'] = 'welfare_home_list_item'"
+                    "and app['version'] in ('7.15.0','7.14.0')"
                    .format(date)).rdd.map(lambda x: x[0]).collect()[0]
    total.append(tmp)
    sum = sum + tmp
-    print("美券相关的美购列表页美购卡片点击")
+    print("美购首页相关推荐")
    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates "
-                    "where partition_date='{}' "
+                    "where partition_date='{}'and action = 'goto_welfare_detail' "
-                    "and action = 'goto_welfare_detail' and params['from'] = 'coupon_welfare_list'"
+                    "and app['version'] in ('7.15.0','7.14.0') and params['from'] = 'welfare_home_list_item'"
                    .format(date)).rdd.map(lambda x: x[0]).collect()[0]
    total.append(tmp)
    sum = sum + tmp
-    print("新美购首页-固定ICON美购卡片点击")
+    home_page_sum = 0
-    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates "
+    print("首页点击'全部'icon按钮进入的列表-美购卡片点击")
-                    "where partition_date='{}' "
+    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates where partition_date='{}' "
-                    "and action = 'goto_welfare_detail' and params['from'] = 'welfare_list'"
+                    "and action = 'goto_welfare_detail' and app['version'] in ('7.15.0','7.14.0') "
+                    "and params['from'] = 'welfare_list' and params['cpc_referer'] = '6'"
                    .format(date)).rdd.map(lambda x: x[0]).collect()[0]
-    total.append(tmp)
+    home_page_sum = home_page_sum + tmp
-    sum = sum + tmp
-    print("首页-品类模块点击跳转到品类聚合美购卡片点击")
+    print("首页点击icon进入的列表-美购卡片点击")
    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates "
-                    "where partition_date='{}' "
+                    "where partition_date='{}'and action = 'goto_welfare_detail' "
-                    "and action = 'goto_welfare_detail' and params['from'] = 'category'"
+                    "and app['version'] in ('7.15.0','7.14.0') "
+                    "and params['from'] = 'category' and params['cpc_referer'] = '19'"
                    .format(date)).rdd.map(lambda x: x[0]).collect()[0]
-    total.append(tmp)
+    home_page_sum = home_page_sum + tmp
-    sum = sum + tmp
+    total.append(home_page_sum)
+    sum = sum + home_page_sum
+    meigou_homepage_sum = 0
+    print("美购首页'全部'点击")
+    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates where partition_date='{}' "
+                    "and action = 'goto_welfare_detail' and app['version'] in ('7.15.0','7.14.0') "
+                    "and params['from'] = 'welfare_list' and params['cpc_referer'] = '21'"
+                    .format(date)).rdd.map(lambda x: x[0]).collect()[0]
+    meigou_homepage_sum = meigou_homepage_sum + tmp
-    print("serach")
+    println("美购首页icon美购点击")
-    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates "
+    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates where partition_date='{}' "
-                    "where partition_date='{}' "
+                    "and action = 'goto_welfare_detail' and app['version'] in ('7.15.0','7.14.0') "
-                    "and action = 'search_result_welfare_click_item'"
+                    "and params['from'] = 'welfare_list' and params['cpc_referer'] = '18'"
                    .format(date)).rdd.map(lambda x: x[0]).collect()[0]
-    total.append(tmp)
+    meigou_homepage_sum = meigou_homepage_sum + tmp
-    sum = sum + tmp
+    total.append(meigou_homepage_sum)
+    sum = sum + meigou_homepage_sum
    total.append(sum)
-    return sum
+    return total
 def cpc_click(x):
-    cpc = []
+    total = []
-    date = (datetime.date.today() - datetime.timedelta(days=x)).strftime("%Y%m%d")
-    cpc.append(date)
    sum = 0
+    date = (datetime.date.today() - datetime.timedelta(days=x)).strftime("%Y%m%d")
+    total.append(date)
-    print("美购首页美购列表卡片点击")
+    print("美购搜索点击")
    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates "
-                    "where partition_date='{}' "
+                    "where partition_date='{}'and action = 'search_result_welfare_click_item' "
-                    "and action = 'goto_welfare_detail' and params['from'] = 'welfare_home_list_item' "
+                    "and app['version'] in ('7.15.0','7.14.0') and params['is_cpc'] = '1'"
-                    "and params['is_cpc'] = '1'"
                    .format(date)).rdd.map(lambda x: x[0]).collect()[0]
-    cpc.append(tmp)
+    total.append(tmp)
    sum = sum + tmp
-    print("美券相关的美购列表页美购卡片点击")
+    print("美购首页相关推荐")
    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates "
-                    "where partition_date='{}' "
+                    "where partition_date='{}'and action = 'goto_welfare_detail' "
-                    "and action = 'goto_welfare_detail' and params['from'] = 'coupon_welfare_list' "
+                    "and app['version'] in ('7.15.0','7.14.0') and params['from'] = 'welfare_home_list_item' "
                    "and params['is_cpc'] = '1'"
                    .format(date)).rdd.map(lambda x: x[0]).collect()[0]
-    cpc.append(tmp)
+    total.append(tmp)
    sum = sum + tmp
-    print("新美购首页-固定ICON美购卡片点击")
+    home_page_sum = 0
-    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates "
+    print("首页点击'全部'icon按钮进入的列表-美购卡片点击")
-                    "where partition_date='{}' "
+    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates where partition_date='{}' "
-                    "and action = 'goto_welfare_detail' and params['from'] = 'welfare_list' "
+                    "and action = 'goto_welfare_detail' and app['version'] in ('7.15.0','7.14.0') "
+                    "and params['from'] = 'welfare_list' and params['cpc_referer'] = '6' "
                    "and params['is_cpc'] = '1'"
                    .format(date)).rdd.map(lambda x: x[0]).collect()[0]
-    cpc.append(tmp)
+    home_page_sum = home_page_sum + tmp
-    sum = sum + tmp
-    print("首页-品类模块点击跳转到品类聚合美购卡片点击")
+    print("首页点击icon进入的列表-美购卡片点击")
    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates "
-                    "where partition_date='{}' "
+                    "where partition_date='{}'and action = 'goto_welfare_detail' "
-                    "and action = 'goto_welfare_detail' and params['from'] = 'category' "
+                    "and app['version'] in ('7.15.0','7.14.0') "
+                    "and params['from'] = 'category' and params['cpc_referer'] = '19' "
                    "and params['is_cpc'] = '1'"
                    .format(date)).rdd.map(lambda x: x[0]).collect()[0]
-    cpc.append(tmp)
+    home_page_sum = home_page_sum + tmp
-    sum = sum + tmp
+    total.append(home_page_sum)
+    sum = sum + home_page_sum
+    meigou_home_sum = 0
+    print("美购首页'全部'点击")
+    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates where partition_date='{}' "
+                    "and action = 'goto_welfare_detail' and app['version'] in ('7.15.0','7.14.0') "
+                    "and params['from'] = 'welfare_list' and params['cpc_referer'] = '21' "
+                    "and params['is_cpc'] = '1'"
+                    .format(date)).rdd.map(lambda x: x[0]).collect()[0]
+    meigou_home_sum = meigou_home_sum + tmp
-    print("serach")
+    println("美购首页icon美购点击")
-    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates "
+    tmp = spark.sql("select count(*) from online.bl_hdfs_maidian_updates where partition_date='{}' "
-                    "where partition_date='{}' "
+                    "and action = 'goto_welfare_detail' and app['version'] in ('7.15.0','7.14.0') "
-                    "and action = 'search_result_welfare_click_item' "
+                    "and params['from'] = 'welfare_list' and params['cpc_referer'] = '18' "
                    "and params['is_cpc'] = '1'"
                    .format(date)).rdd.map(lambda x: x[0]).collect()[0]
-    cpc.append(tmp)
+    meigou_home_sum = meigou_home_sum + tmp
-    sum = sum + tmp
+    total.append(meigou_home_sum)
+    sum = sum + meigou_home_sum
+    total.append(sum)
+    return total
-    cpc.append(sum)
-    return sum
@@ -125,29 +151,20 @@ if __name__ == '__main__':
        .set("spark.driver.maxResultSize", "8g").set("spark.sql.avro.compression.codec", "snappy")
    spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate()
-    clicks = []
+    all_list = []
-    cpcs = []
+    for i in range(1,3):
-    dates = []
-    for i in range(1,26):
-        clicks.append(all_click(i))
-        cpcs.append(cpc_click(i))
        date_str = (datetime.date.today() - datetime.timedelta(days=i)).strftime("%Y%m%d")
-        dates.append(date_str)
+        tmp_list = [date_str]
-    print("clicks")
+        tmp_list.extend(all_click(i))
-    print(clicks)
+        tmp_list.extend(cpc_click(i))
+        all_list.append(tmp_list)
-    print("cpcs")
-    print(cpcs)
+    df = pd.DataFrame(all_list)
-    df = pd.DataFrame({'date': dates,
-                   'clicks': clicks,"cpc":cpcs})
+    df = df.rename(columns={0: "搜索点击", 1: "相关推荐",2:"首页icon",3:"美购首页icon",4: "总点击",
+                            5: "搜索点击cpc", 6: "相关推荐cpc",7:"首页icon_cpc",8:"美购首页icon_cpc",
+                            8:"cpc总点击"})
    df.to_csv('/home/gmuser/cpc.csv', index=False)
-    # rdd = spark.sparkContext.parallelize(cpcs)
-    # df = spark.createDataFrame(rdd).toDF.toPandas()
-    # df.to_csv('/home/gmuser/cpc.csv',index=False)
-    #
-    # rdd = spark.sparkContext.parallelize(clicks)
-    # df = spark.createDataFrame(rdd).toDF.toPandas()
-    # df.to_csv('/home/gmuser/clicks.csv', index=False)
    spark.stop()