Commit 0d4db92d authored by litaolemo's avatar litaolemo

update

parent 1f9e6422
......@@ -388,47 +388,37 @@ def get_keyword_ctr(start_ts,end_ts):
end_date_str = end_date.strftime("%Y%m%d")
data_dic = {}
# --query词曝光
baoguang_sql = """
SELECT card_id as query,count(*) as query_count FROM online.ml_community_precise_exposure_detail
WHERE partition_date>='{start_date}' AND partition_date<'{end_date}' and page_name in ('search_home','search_home_more','search_home_welfare','search_home_diary','search_home_wiki','search_home_post','search_home_hospital','search_home_doctor') group by query
""".format(start_date=str(start_date_str),end_date=str(end_date_str))
device_df = spark.sql(baoguang_sql)
device_df.show(1, False)
sql_res = device_df.collect()
print("-----------------------------------------------------------------------------")
print("-----------------------------------------------------------------------------")
print("-----------------------------------------------------------------------------")
print("-----------------------------------------------------------------------------")
print("-----------------------------------------------------------------------------")
print("-----------------------------------------------------------------------------")
print("-----------------------------------------------------------------------------")
for res in sql_res:
print(res)
print(res.query)
# hive.cursor.execute(baoguang_sql)
# for data in hive.cursor.fetchall():
# data_dic[data[0]] = data[1]
# --query词曝光
# query_sql = """
# SELECT params['query_words']
# FROM online.bl_hdfs_maidian_updates
# WHERE partition_date>='{start_date}' AND partition_date<'{end_date}'
# AND action = 'report_status'
# AND page_name in ('search_home','search_home_more','search_home_welfare','search_home_diary','search_home_wiki','search_home_post','search_home_hospital','search_home_doctor')
# """.format(start_date=start_date_str,end_date=end_date_str)
#
# baoguang_sql = """
# SELECT card_id as query,count(*) as query_count FROM online.ml_community_precise_exposure_detail
# WHERE partition_date>='{start_date}' AND partition_date<'{end_date}' and page_name in ('search_home','search_home_more','search_home_welfare','search_home_diary','search_home_wiki','search_home_post','search_home_hospital','search_home_doctor') group by query
# """.format(start_date=str(start_date_str),end_date=str(end_date_str))
# device_df = spark.sql(baoguang_sql)
# device_df.show(1, False)
# sql_res = device_df.collect()
# print("-----------------------------------------------------------------------------")
# for res in sql_res:
# print(res)
# for single_keyword in device_df[0]:
# data_count = data_dic.get(single_keyword)
# if data_count:
# data_dic[single_keyword] = data_dic[single_keyword] + 1
# else:
# data_dic[single_keyword] = 0
# return data_dic
# data_dic[res.query] = res.query_count
# --query词曝光
query_sql = """
SELECT params['query_words'] as query_words
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>='{start_date}' AND partition_date<'{end_date}'
AND action = 'report_status'
AND page_name in ('search_home','search_home_more','search_home_welfare','search_home_diary','search_home_wiki','search_home_post','search_home_hospital','search_home_doctor')
""".format(start_date=start_date_str,end_date=end_date_str)
device_df = spark.sql(query_sql)
device_df.show(1, False)
sql_res = device_df.collect()
for res in sql_res:
print(res,type(res))
for single_keyword in res.query_words:
data_count = data_dic.get(single_keyword)
if data_count:
data_dic[single_keyword] = data_dic[single_keyword] + 1
else:
data_dic[single_keyword] = 0
return data_dic
def craw_query_one_week(data_index, start_ts, end_ts, week_num, last_week_num, year):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment