Commit 3a63e03f authored by 高雅喆's avatar 高雅喆

update

parent 7da1bfce
...@@ -78,42 +78,6 @@ def get_meigou_smart_rank(service_id, result_all_dict, service_detail_view_count ...@@ -78,42 +78,6 @@ def get_meigou_smart_rank(service_id, result_all_dict, service_detail_view_count
return float('%.4g' % meigou_smart_rank_score) return float('%.4g' % meigou_smart_rank_score)
def send_email(app,id,e):
# 第三方 SMTP 服务
mail_host = 'smtp.exmail.qq.com' # 设置服务器
mail_user = "gaoyazhe@igengmei.com" # 用户名
mail_pass = "VCrKTui99a7ALhiK" # 口令
sender = 'gaoyazhe@igengmei.com'
receivers = ['gaoyazhe@igengmei.com'] # 接收邮件,可设置为你的QQ邮箱或者其他邮箱
e = str(e)
msg = MIMEMultipart()
part = MIMEText('app_id:'+id+':fail', 'plain', 'utf-8')
msg.attach(part)
msg['From'] = formataddr(["gaoyazhe", sender])
# 括号里的对应收件人邮箱昵称、收件人邮箱账号
msg['To'] = ";".join(receivers)
# message['Cc'] = ";".join(cc_reciver)
msg['Subject'] = 'spark streaming:app_name:'+app
with open('error.txt','w') as f:
f.write(e)
f.close()
part = MIMEApplication(open('error.txt', 'r').read())
part.add_header('Content-Disposition', 'attachment', filename="error.txt")
msg.attach(part)
try:
smtpObj = smtplib.SMTP_SSL(mail_host, 465)
smtpObj.login(mail_user, mail_pass)
smtpObj.sendmail(sender, receivers, msg.as_string())
except smtplib.SMTPException:
print('error')
if __name__ == '__main__': if __name__ == '__main__':
try: try:
start = time.time() start = time.time()
...@@ -164,7 +128,7 @@ if __name__ == '__main__': ...@@ -164,7 +128,7 @@ if __name__ == '__main__':
result = device_ids_rdd.repartition(40).map( result = device_ids_rdd.repartition(40).map(
lambda x: update_device_smart_rank(str(x, encoding='utf-8'), result_all_dict, lambda x: update_device_smart_rank(str(x, encoding='utf-8'), result_all_dict,
service_detail_view_count_30_dict, result_smart_rank_score_dict)) service_detail_view_count_30_dict, result_smart_rank_score_dict))
# result.foreach(print) result.collect()
print(time.time() - start) print(time.time() - start)
except Exception as e: except Exception as e:
print(e) print(e)
\ No newline at end of file
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pymysql
import smtplib
from email.mime.text import MIMEText
from email.utils import formataddr
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
import redis
import datetime
from pyspark import SparkConf
import time
from pyspark.sql import SparkSession
import json
import numpy as np
import pandas as pd
from pyspark.sql.functions import lit
from pyspark.sql.functions import concat_ws
def get_user_history_order_service_tag(user_id, stat_date):
try:
if user_id:
db_zhengxing = pymysql.connect(host="172.16.30.141", port=3306, user="work",
password="BJQaT9VzDcuPBqkd",
db="zhengxing", cursorclass=pymysql.cursors.DictCursor)
cur_zhengxing = db_zhengxing.cursor()
sql = "select a.tag_id from api_servicetag a left join api_tag b on a.tag_id=b.id " \
"where a.service_id in (select service_id from api_order where user_id={user_id} and status=1) " \
"and b.tag_type<'4' ".format(user_id=user_id)
cur_zhengxing.execute(sql)
tags_dict = cur_zhengxing.fetchall()
tags_list = [i["tag_id"] for i in tags_dict]
db_jerry_test = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC',
db='jerry_test', charset='utf8')
cur_jerry_test = db_jerry_test.cursor()
replace_sql = """replace into user_history_order_tags (stat_date, cl_id, tag_list) values("{stat_date}","{cl_id}","{tag_list}")"""\
.format(stat_date=stat_date, cl_id=cl_id, tag_list=tag_id_list)
cur_jerry_test.execute(replace_sql)
db_jerry_test.commit()
db_jerry_test.close()
return tags_list
else:
return []
except Exception as e:
print(e)
if __name__ == '__main__':
try:
db_zhengxing = pymysql.connect(host="172.16.30.141", port=3306, user="work",
password="BJQaT9VzDcuPBqkd",
db="zhengxing", cursorclass=pymysql.cursors.DictCursor)
cur_zhengxing = db_zhengxing.cursor()
# 获取所有用户的设备id
sql_device_ids = "select distinct user_id from api_order where status=1 and pay_time>'2017-08-16'"
cur_zhengxing.execute(sql_device_ids)
device_ids_lst = [i["user_id"] for i in cur_zhengxing.fetchall()]
stat_date = datetime.datetime.today().strftime('%Y-%m-%d')
# rdd
sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
.set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \
.set("spark.tispark.plan.allow_index_double_read", "false") \
.set("spark.tispark.plan.allow_index_read", "true") \
.set("spark.sql.extensions", "org.apache.spark.sql.TiExtensions") \
.set("spark.tispark.pd.addresses", "172.16.40.158:2379").set("spark.io.compression.codec", "lzf") \
.set("spark.driver.maxResultSize", "8g").set("spark.sql.avro.compression.codec", "snappy")
spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate()
spark.sparkContext.setLogLevel("WARN")
device_ids_lst_rdd = spark.sparkContext.parallelize(device_ids_lst)
gm_kv_cli = redis.Redis(host="172.16.40.135", port=5379, db=6, socket_timeout=2000)
result = device_ids_lst_rdd.repartition(100).map(lambda x: get_user_history_order_service_tag(x, stat_date))
result.collect()
except Exception as e:
print(e)
\ No newline at end of file
...@@ -58,6 +58,24 @@ def tag_list2dict(lst, size): ...@@ -58,6 +58,24 @@ def tag_list2dict(lst, size):
return result[:size] return result[:size]
def get_device_user_id(cl_id):
try:
db_zhengxing = pymysql.connect(host="172.16.30.141", port=3306, user="work",
password="BJQaT9VzDcuPBqkd",
db="zhengxing", cursorclass=pymysql.cursors.DictCursor)
cur_zhengxing = db_zhengxing.cursor()
sql = """select user_id from statistic_device_user where device_id = (select id from statistic_device where device_id = "{}")""".format(cl_id)
cur_zhengxing.execute(sql)
cl_id = cur_zhengxing.fetchall()
if cl_id:
return cl_id[0]["user_id"]
else:
return ""
except Exception as e:
print(e)
def get_user_tag_score(cl_id, all_log_df, stat_date, size=10): def get_user_tag_score(cl_id, all_log_df, stat_date, size=10):
try: try:
db_jerry_test = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db_jerry_test = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC',
...@@ -77,8 +95,9 @@ def get_user_tag_score(cl_id, all_log_df, stat_date, size=10): ...@@ -77,8 +95,9 @@ def get_user_tag_score(cl_id, all_log_df, stat_date, size=10):
finally_score_lst = finally_score[["tag_id","tag_score"]].to_dict('record') finally_score_lst = finally_score[["tag_id","tag_score"]].to_dict('record')
tag_id_list = tag_list2dict(finally_score_lst, size) tag_id_list = tag_list2dict(finally_score_lst, size)
replace_sql = """replace into user_portrait_tags (stat_date, cl_id, tag_list) values("{stat_date}","{cl_id}","{tag_list}")"""\ user_id = get_device_user_id(cl_id)
.format(stat_date=stat_date, cl_id=cl_id, tag_list=tag_id_list) replace_sql = """replace into user_portrait_tags (stat_date, cl_id, user_id, tag_list) values("{stat_date}","{cl_id}","{user_id}","{tag_list}")"""\
.format(stat_date=stat_date, cl_id=cl_id, user_id=user_id, tag_list=tag_id_list)
cur_jerry_test.execute(replace_sql) cur_jerry_test.execute(replace_sql)
db_jerry_test.commit() db_jerry_test.commit()
db_jerry_test.close() db_jerry_test.close()
...@@ -87,41 +106,6 @@ def get_user_tag_score(cl_id, all_log_df, stat_date, size=10): ...@@ -87,41 +106,6 @@ def get_user_tag_score(cl_id, all_log_df, stat_date, size=10):
return 'pass' return 'pass'
def send_email(app,id,e):
# 第三方 SMTP 服务
mail_host = 'smtp.exmail.qq.com' # 设置服务器
mail_user = "gaoyazhe@igengmei.com" # 用户名
mail_pass = "VCrKTui99a7ALhiK" # 口令
sender = 'gaoyazhe@igengmei.com'
receivers = ['gaoyazhe@igengmei.com'] # 接收邮件,可设置为你的QQ邮箱或者其他邮箱
e = str(e)
msg = MIMEMultipart()
part = MIMEText('app_id:'+id+':fail', 'plain', 'utf-8')
msg.attach(part)
msg['From'] = formataddr(["gaoyazhe", sender])
# 括号里的对应收件人邮箱昵称、收件人邮箱账号
msg['To'] = ";".join(receivers)
# message['Cc'] = ";".join(cc_reciver)
msg['Subject'] = 'spark streaming:app_name:'+app
with open('error.txt','w') as f:
f.write(e)
f.close()
part = MIMEApplication(open('error.txt', 'r').read())
part.add_header('Content-Disposition', 'attachment', filename="error.txt")
msg.attach(part)
try:
smtpObj = smtplib.SMTP_SSL(mail_host, 465)
smtpObj.login(mail_user, mail_pass)
smtpObj.sendmail(sender, receivers, msg.as_string())
except smtplib.SMTPException:
print('error')
if __name__ == '__main__': if __name__ == '__main__':
try: try:
db_jerry_test = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db_jerry_test = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC',
...@@ -131,7 +115,6 @@ if __name__ == '__main__': ...@@ -131,7 +115,6 @@ if __name__ == '__main__':
# 获取所有用户的设备id # 获取所有用户的设备id
sql_device_ids = "select distinct cl_id from user_new_tag_log" sql_device_ids = "select distinct cl_id from user_new_tag_log"
cur_jerry_test.execute(sql_device_ids) cur_jerry_test.execute(sql_device_ids)
# todo 放开用户量
device_ids_lst = [i[0] for i in cur_jerry_test.fetchall()] device_ids_lst = [i[0] for i in cur_jerry_test.fetchall()]
# 获取所有用户的行为日志 # 获取所有用户的行为日志
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment