Commit ca265a7c authored by litaolemo's avatar litaolemo

update

parent dca24616
......@@ -22,6 +22,7 @@ from pyspark import SparkConf
from pyspark.sql import SparkSession, DataFrame
from meta_base_code.utils.func_from_redis_get_portrait import *
# from pyspark.sql.functions import lit
# import pytispark.pytispark as pti
......@@ -78,13 +79,11 @@ spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDF
task_list = []
task_days = 3
for t in range(2, task_days):
day_num = 0 - t
now = (datetime.datetime.now() + datetime.timedelta(days=day_num))
last_30_day_str = (now + datetime.timedelta(days=-30)).strftime("%Y%m%d")
tomorrow_str = (datetime.datetime.now() + datetime.timedelta(days=day_num+1)).strftime("%Y%m%d")
tomorrow_str = (datetime.datetime.now() + datetime.timedelta(days=day_num + 1)).strftime("%Y%m%d")
today_str = now.strftime("%Y%m%d")
today_str_format = now.strftime("%Y-%m-%d")
yesterday_str = (now + datetime.timedelta(days=-1)).strftime("%Y%m%d")
......@@ -97,89 +96,105 @@ SELECT * FROM online.bl_hdfs_maidian_updates
AND ((action in ('on_click_topic_card','on_click_diary_card','search_result_click_infomation_item')
AND page_name in ('search_result_more','search_result_diary','search_result_post'))
or (action = 'on_click_card' AND params['card_content_type'] in ('answer','diary') AND page_name in ('search_result_more','search_result_diary','search_result_question_answer')))
""".format(partition_day="20210224", end_date="20210225",tomorrow_str=tomorrow_str)
""".format(partition_day="20210224", end_date="20210225", tomorrow_str=tomorrow_str)
# print(new_urser_device_id_sql)
# print(new_urser_device_id_sql)
new_urser_device_id_df = spark.sql(new_urser_device_id_sql)
new_urser_device_id_df.createOrReplaceTempView("device_id_view")
new_urser_device_id_df.show(1)
sql_res = new_urser_device_id_df.collect()
res_list = []
for res in sql_res:
print(res)
# sql_res = new_urser_device_id_df.collect()
# res_dict = {}
# portrait_dict = {
# "first_demands": {},
# "second_demands": {},
# "first_solutions": {},
# "second_solutions": {},
# "first_positions": {},
# "second_positions": {},
# "projects": {},
# 'anecdote_tags':{}
# }
# no_portrait_device_id_list = []
# print("-------------------------------")
# count_not_has_portratit = 0
#
# for count_user_count, res in enumerate(sql_res):
# # print(count, res)
# portratit_res = get_user_portrait_tag3_from_redis(res.device_id)
# sql = """select cl_id, projects from kafka_tag3_log
# where cl_id = '%s' and event_cn = 'kyc' """ % res.device_id
# # print(count_user_count, res, portratit_res)
# sql_res_list = con_sql(sql)
# kyc_str_list= []
# if sql_res_list:
# print(sql_res_list,type(sql_res_list))
# kyc_str_list = sql_res_list[0][1].split(",")
#
# temp_count = 0
# for demand in portratit_res:
# if portratit_res[demand]:
# try:
# for tag in portratit_res[demand][0:3]:
# if tag in portrait_dict[demand]:
# portrait_dict[demand][tag] += 1
# else:
# portrait_dict[demand][tag] = 1
# if tag in kyc_str_list and demand == "projects":
# if portrait_dict["projects"].get(tag):
# portrait_dict["projects"][tag] -= 1
# except Exception as e:
# print("error ", e)
#
# temp_count += 1
# if not temp_count:
# count_not_has_portratit += 1
# no_portrait_device_id_list.append(res.device_id)
#
#
# print(portrait_dict)
# print(count_user_count+1,count_not_has_portratit)
# print("-------------------------------")
#
#
# for protratit_type in portrait_dict["projects"]:
# partition_date = today_str
# pid = hashlib.md5((partition_date + protratit_type).encode("utf8")).hexdigest()
# action_count = portrait_dict["projects"][protratit_type]
#
# instert_sql = """replace into new_user_project_count(
# partition_day,pid,protratit_count,protratit_type) VALUES('{partition_day}','{pid}',{protratit_count},'{protratit_type}');""".format(
# partition_day=today_str, pid=pid, protratit_count=action_count
# , protratit_type=protratit_type
# )
# print(instert_sql)
# # cursor.execute("set names 'UTF8'")
# db = pymysql.connect(host='172.16.50.175', port=3306, user='doris', passwd='o5gbA27hXHHm',
# db='doris_olap')
# cursor = db.cursor()
# res = cursor.execute(instert_sql)
# db.commit()
# print(res)
#
query = res.params["query"]
card_name = res.params["card_name"]
card_id = res.params["card_id"]
user_id = res.user_id
time_str = res.time_str
page_name = res.page_name
res_list.append({"query": query,
"card_name": card_name,
"card_id": card_id,
"user_id": user_id,
"time_str": time_str,
"page_name": page_name
})
import pandas
data = pandas.DataFrame(res_list)
data.to_csv("data.csv",encoding="gb18030")
from maintenance.send_email_with_file_auto_task import *
send_file_email("",'',sender="litao@igengmei.com",email_group=["litao@igengmei.com"],email_msg_body_str="test",title_str="test",cc_group=["litao@igengmei.com"],file="data.csv")
# sql_res = new_urser_device_id_df.collect()
# res_dict = {}
# portrait_dict = {
# "first_demands": {},
# "second_demands": {},
# "first_solutions": {},
# "second_solutions": {},
# "first_positions": {},
# "second_positions": {},
# "projects": {},
# 'anecdote_tags':{}
# }
# no_portrait_device_id_list = []
# print("-------------------------------")
# count_not_has_portratit = 0
#
# for count_user_count, res in enumerate(sql_res):
# # print(count, res)
# portratit_res = get_user_portrait_tag3_from_redis(res.device_id)
# sql = """select cl_id, projects from kafka_tag3_log
# where cl_id = '%s' and event_cn = 'kyc' """ % res.device_id
# # print(count_user_count, res, portratit_res)
# sql_res_list = con_sql(sql)
# kyc_str_list= []
# if sql_res_list:
# print(sql_res_list,type(sql_res_list))
# kyc_str_list = sql_res_list[0][1].split(",")
#
# temp_count = 0
# for demand in portratit_res:
# if portratit_res[demand]:
# try:
# for tag in portratit_res[demand][0:3]:
# if tag in portrait_dict[demand]:
# portrait_dict[demand][tag] += 1
# else:
# portrait_dict[demand][tag] = 1
# if tag in kyc_str_list and demand == "projects":
# if portrait_dict["projects"].get(tag):
# portrait_dict["projects"][tag] -= 1
# except Exception as e:
# print("error ", e)
#
# temp_count += 1
# if not temp_count:
# count_not_has_portratit += 1
# no_portrait_device_id_list.append(res.device_id)
#
#
# print(portrait_dict)
# print(count_user_count+1,count_not_has_portratit)
# print("-------------------------------")
#
#
# for protratit_type in portrait_dict["projects"]:
# partition_date = today_str
# pid = hashlib.md5((partition_date + protratit_type).encode("utf8")).hexdigest()
# action_count = portrait_dict["projects"][protratit_type]
#
# instert_sql = """replace into new_user_project_count(
# partition_day,pid,protratit_count,protratit_type) VALUES('{partition_day}','{pid}',{protratit_count},'{protratit_type}');""".format(
# partition_day=today_str, pid=pid, protratit_count=action_count
# , protratit_type=protratit_type
# )
# print(instert_sql)
# # cursor.execute("set names 'UTF8'")
# db = pymysql.connect(host='172.16.50.175', port=3306, user='doris', passwd='o5gbA27hXHHm',
# db='doris_olap')
# cursor = db.cursor()
# res = cursor.execute(instert_sql)
# db.commit()
# print(res)
#
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment