Commit ea32b724 authored by litaolemo's avatar litaolemo

update

parent 2f92d2d0
...@@ -18,12 +18,10 @@ import sys ...@@ -18,12 +18,10 @@ import sys
import time import time
from pyspark import SparkConf from pyspark import SparkConf
from pyspark.sql import SparkSession, DataFrame from pyspark.sql import SparkSession, DataFrame
from pyspark.sql.functions import lit # from pyspark.sql.functions import lit
import pytispark.pytispark as pti # import pytispark.pytispark as pti
db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy',
db='jerry_prod')
cursor = db.cursor()
def con_sql(sql): def con_sql(sql):
# 从数据库的表里获取数据 # 从数据库的表里获取数据
...@@ -89,6 +87,9 @@ device_df = spark.sql(sql) ...@@ -89,6 +87,9 @@ device_df = spark.sql(sql)
device_df.show(1, False) device_df.show(1, False)
sql_res = device_df.collect() sql_res = device_df.collect()
print("-----------------------------------------------------------------------------") print("-----------------------------------------------------------------------------")
db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy',
db='jerry_prod')
cursor = db.cursor()
for res in sql_res: for res in sql_res:
# print(res) # print(res)
day_id = res.day_id day_id = res.day_id
......
...@@ -19,8 +19,8 @@ import sys ...@@ -19,8 +19,8 @@ import sys
import time import time
from pyspark import SparkConf from pyspark import SparkConf
from pyspark.sql import SparkSession, DataFrame from pyspark.sql import SparkSession, DataFrame
from pyspark.sql.functions import lit # from pyspark.sql.functions import lit
import pytispark.pytispark as pti # import pytispark.pytispark as pti
...@@ -60,7 +60,7 @@ spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJso ...@@ -60,7 +60,7 @@ spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJso
spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDFArryMerge'") spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDFArryMerge'")
task_list = [] task_list = []
task_days = 10 task_days = 2
for t in range(0, task_days): for t in range(0, task_days):
day_num = 0 - t day_num = 0 - t
......
...@@ -204,7 +204,7 @@ for t in range(0, task_days): ...@@ -204,7 +204,7 @@ for t in range(0, task_days):
T.CARD_ID, --卡片ID T.CARD_ID, --卡片ID
SUM(T.CLICK_NUM) AS CLICK_NUM --点击次数 SUM(T.CLICK_NUM) AS CLICK_NUM --点击次数
FROM ML.ML_C_ET_CK_CLICK_DIMEN_D T FROM ML.ML_C_ET_CK_CLICK_DIMEN_D T
WHERE T.PARTITION_DAY = '${partition_day}' WHERE T.PARTITION_DAY = '{partition_day}'
AND T.PAGE_CODE = 'search_result_welfare' AND T.PAGE_CODE = 'search_result_welfare'
AND T.ACTION IN ('goto_welfare_detail','search_result_welfare_click_item') AND T.ACTION IN ('goto_welfare_detail','search_result_welfare_click_item')
GROUP BY T.DEVICE_ID, GROUP BY T.DEVICE_ID,
...@@ -215,7 +215,7 @@ left join ...@@ -215,7 +215,7 @@ left join
T.CARD_ID as CARD_ID, --卡片ID T.CARD_ID as CARD_ID, --卡片ID
COUNT(T.CARD_ID) AS EXPOSURE --点击次数 COUNT(T.CARD_ID) AS EXPOSURE --点击次数
FROM ML.MID_ML_C_ET_PE_PRECISEEXPOSURE_DIMEN_D T FROM ML.MID_ML_C_ET_PE_PRECISEEXPOSURE_DIMEN_D T
WHERE T.PARTITION_DAY = '${partition_day}' WHERE T.PARTITION_DAY = '{partition_day}'
AND T.PAGE_CODE = 'search_result_welfare' AND T.PAGE_CODE = 'search_result_welfare'
AND T.CARD_TYPE = 'common_card' AND T.CARD_TYPE = 'common_card'
GROUP BY T.DEVICE_ID, GROUP BY T.DEVICE_ID,
...@@ -225,7 +225,7 @@ left join ...@@ -225,7 +225,7 @@ left join
T.DEVICE_OS_TYPE, T.DEVICE_OS_TYPE,
T.ACTIVE_TYPE T.ACTIVE_TYPE
FROM ML.ML_C_CT_DV_DEVICE_DIMEN_D T FROM ML.ML_C_CT_DV_DEVICE_DIMEN_D T
WHERE T.PARTITION_DAY = '${partition_day}' WHERE T.PARTITION_DAY = '{partition_day}'
AND T.ACTIVE_TYPE IN ('1', '2', '4')) AND T.ACTIVE_TYPE IN ('1', '2', '4'))
D on T.DEVICE_ID = D.DEVICE_ID D on T.DEVICE_ID = D.DEVICE_ID
LEFT JOIN spam_pv on spam_pv.device_id= T.DEVICE_ID LEFT JOIN spam_pv on spam_pv.device_id= T.DEVICE_ID
...@@ -234,7 +234,7 @@ left join ...@@ -234,7 +234,7 @@ left join
and (dev.device_id is null or dev.device_id='') and (dev.device_id is null or dev.device_id='')
GROUP by D.DEVICE_OS_TYPE, GROUP by D.DEVICE_OS_TYPE,
D.ACTIVE_TYPE D.ACTIVE_TYPE
""" """.format(partition_day=yesterday_str)
print(sql_search_ctr) print(sql_search_ctr)
search_ctr_df = spark.sql(sql_search_ctr) search_ctr_df = spark.sql(sql_search_ctr)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment