# coding: utf-8 from __future__ import absolute_import from __future__ import division from __future__ import print_function import pymysql import smtplib from email.mime.text import MIMEText from email.utils import formataddr from email.mime.multipart import MIMEMultipart from email.mime.application import MIMEApplication import redis import datetime import time import json import numpy as np import pandas as pd import traceback def send_email(app,id,e): # 第三方 SMTP 服务 mail_host = 'smtp.exmail.qq.com' # 设置服务器 mail_user = "zhaowei@igengmei.com" # 用户名 mail_pass = "Gengmei1234" # 口令 sender = 'zhaowei@igengmei.com' receivers = ['zhaowei@igengmei.com'] # 接收邮件,可设置为你的QQ邮箱或者其他邮箱 e = str(e) msg = MIMEMultipart() part = MIMEText('app_id:'+id+':fail' + "\n" + e, 'plain', 'utf-8') msg.attach(part) msg['From'] = formataddr(["gaoyazhe", sender]) # 括号里的对应收件人邮箱昵称、收件人邮箱账号 msg['To'] = ";".join(receivers) # message['Cc'] = ";".join(cc_reciver) msg['Subject'] = 'spark streaming:app_name:'+app try: with open('error.txt','w') as f: f.write(e) f.close() part = MIMEApplication(open('error.txt', 'r').read()) part.add_header('Content-Disposition', 'attachment', filename="error.txt") msg.attach(part) except Exception as e: print(e) try: smtpObj = smtplib.SMTP_SSL(mail_host, 465) smtpObj.login(mail_user, mail_pass) smtpObj.sendmail(sender, receivers, msg.as_string()) except smtplib.SMTPException: print('error') def get_data_by_mysql(host, port, user, passwd, db, sql): try: db = pymysql.connect(host=host, port=port, user=user, passwd=passwd, db=db, cursorclass=pymysql.cursors.DictCursor) cursor = db.cursor() cursor.execute(sql) results = cursor.fetchall() db.close() cursor.close() return results except Exception as e: print(e) def write_data_by_mysql(host, port, user, passwd, db, sql): try: db = pymysql.connect(host=host, port=port, user=user, passwd=passwd, db=db, cursorclass=pymysql.cursors.DictCursor) cursor = db.cursor() cursor.execute(sql) db.commit() db.close() cursor.close() return True except Exception as e: print(e) return False def get_all_search_word_synonym_tags(): """ :return:dict {"search_word1":[tag_list1],"search_word2":[tag_list2]...} """ try: sql = "select a.keyword , c.id from api_wordrel a " \ "left join api_wordrelsynonym b on a.id = b.wordrel_id " \ "left join api_tag c on b.word=c.name " \ "where c.tag_type+0<'4'+0 and c.is_online=1" mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'zx_str', 'ZXueX58pStrage', 'zhengxing', sql) result_dict = dict() for data in mysql_results: if data['keyword'] not in result_dict: result_dict[data['keyword']] = [data['id']] else: result_dict[data['keyword']].append(data['id']) return result_dict except Exception as e: print(e) def get_all_word_synonym_words(): try: sql = "select a.keyword, b.word from api_wordrel a " \ "left join api_wordrelsynonym b on a.id = b.wordrel_id " mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'zx_str', 'ZXueX58pStrage', 'zhengxing', sql) result_dict = dict() for data in mysql_results: if data['keyword'] not in result_dict: result_dict[data['keyword']] = [data['word']] else: result_dict[data['keyword']].append(data['word']) return result_dict except Exception as e: print(e) def get_all_synonym_tags(): """ :return:dict {"search_word1":[tag_list1],"search_word2":[tag_list2]...} """ try: sql = "select a.word, b.id from api_wordrelsynonym a left join api_tag b " \ "on a.word=b.name where b.tag_type+0<'4'+0 and b.is_online=1" mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'zx_str', 'ZXueX58pStrage', 'zhengxing', sql) result_dict = dict() for data in mysql_results: if data['word'] not in result_dict: result_dict[data['word']] = [data['id']] else: result_dict[data['word']].append(data['id']) return result_dict except Exception as e: print(e) def get_all_api_tags(): """ :return:dict {"search_word1":[tag_list1],"search_word2":[tag_list2]...} """ try: sql = "select name, id from api_tag where tag_type in ('1', '2', '3', '5') and is_online=1" mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'zx_str', 'ZXueX58pStrage', 'zhengxing', sql) result_dict = dict() for data in mysql_results: if data['name'] not in result_dict: result_dict[data['name']] = [data['id']] else: result_dict[data['name']].append(data['id']) return result_dict except Exception as e: print(e) def get_all_word_tags(): try: search_word_synonym_tags = get_all_search_word_synonym_tags() synonym_tags = get_all_synonym_tags() api_tags = get_all_api_tags() return {**search_word_synonym_tags, **synonym_tags, **api_tags} except Exception as e: print(e) def get_all_tag_tag_type(): """ :return:dict {tag_id1:tag_type1,tag_id2:tag_type2...} """ try: sql = "select id,tag_type from api_tag where tag_type+0<'4'+0 and is_online=1" mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'zx_str', 'ZXueX58pStrage', 'zhengxing', sql) result_dict = dict() for data in mysql_results: result_dict[data['id']] = data['tag_type'] return result_dict except Exception as e: print(e) def get_all_3tag_2tag(): try: sql = "select a.child_id,a.parent_id from api_tagrelation a" \ " left join api_tag b on a.parent_id=b.id " \ "where a.child_id in (select id from api_tag where tag_type='3' and is_online=1) " \ "and b.tag_type='2' and b.is_online=1" mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'zx_str', 'ZXueX58pStrage', 'zhengxing', sql) result_dict = dict() for data in mysql_results: if data['child_id'] not in result_dict: result_dict[data['child_id']] = [data['parent_id']] else: result_dict[data['child_id']].append(data['parent_id']) return result_dict except Exception as e: print(e) def get_all_tag_parent_tag(): try: sql = "select a.child_id,a.parent_id from api_tagrelation a" \ " left join api_tag b on a.parent_id=b.id " \ "where a.child_id in (select id from api_tag where tag_type+0<'4'+0 and is_online=1) " \ "and b.tag_type+0<'4'+0 and b.is_online=1" mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'zx_str', 'ZXueX58pStrage', 'zhengxing', sql) result_dict = dict() for data in mysql_results: if data['child_id'] not in result_dict: result_dict[data['child_id']] = [data['parent_id']] else: result_dict[data['child_id']].append(data['parent_id']) return result_dict except Exception as e: print(e) def get_all_tags_name(): try: sql = "select id, name from api_tag where tag_type+0<'4'+0 and is_online=1" mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'zx_str', 'ZXueX58pStrage', 'zhengxing', sql) result_dict = dict() for data in mysql_results: result_dict[data['id']] = data['name'] return result_dict except Exception as e: print(e) def get_tag2_from_tag3(tag3, all_3tag_2tag, user_log_df_tag2_list): try: tag2s = [] if tag3 in all_3tag_2tag: tag2s = all_3tag_2tag[tag3] for tag2 in tag2s: if tag2 in user_log_df_tag2_list: return tag2 return tag3 except Exception as e: print(e) def compute_henqiang(x, decay_days=30, exponential=0, action_tag_count=1): if exponential: alpha = exponential_decay(x, decay_days) score = 15/action_tag_count - 1.1**alpha * ((15-0.5)/decay_days) else: score = 15/action_tag_count - x * ((15-0.5)/decay_days) if score > 0.5: return score else: return 0.5 def compute_jiaoqiang(x, decay_days=30, exponential=0, action_tag_count=1): if exponential: alpha = exponential_decay(x, decay_days) score = 12/action_tag_count - 1.1**alpha * ((12-0.5)/decay_days) else: score = 12/action_tag_count - x * ((12-0.5)/decay_days) if score > 0.5: return score else: return 0.5 def compute_ruoyixiang(x, decay_days=30, exponential=0, action_tag_count=1): if exponential: alpha = exponential_decay(x, decay_days) score = 5/action_tag_count - 1.1**alpha * ((5-0.5)/decay_days) else: score = 5/action_tag_count - x * ((5-0.5)/decay_days) if score > 0.5: return score else: return 0.5 def compute_validate(x, decay_days=30, exponential=0, action_tag_count=1): if exponential: alpha = exponential_decay(x, decay_days) score = 10/action_tag_count - 1.1**alpha * ((10-0.5)/decay_days) else: score = 10/action_tag_count - x * ((10-0.5)/decay_days) if score > 0.5: return score else: return 0.5 def compute_ai_scan(x, decay_days=30, exponential=0, action_tag_count=1): if exponential: alpha = exponential_decay(x, decay_days) score = 2/action_tag_count - 1.1**alpha * ((2-0.5)/decay_days) else: score = 2/action_tag_count - x * ((2-0.5)/decay_days) if score > 0.5: return score else: return 0.5 def get_action_tag_count(df, action_time): try: if not df[df['time'] == action_time].empty: return len(df[df['time'] == action_time]) else: return 1 except Exception as e: print(e) def exponential_decay(days_diff, decay_days=30): # 天数差归一化到[0, decay_days] x = np.arange(1, 180+1, 1) a = (decay_days - 0) * (days_diff - min(x)) / (max(x) - min(x)) return a def args_test(x): return "gyz add" + str(x) def get_user_log(cl_id, all_word_tags, pay_time=0, debug=0): user_df_service = pd.DataFrame(columns=["time", "cl_id", "score_type", "tag_id", "tag_referrer", "action"]) try: db_jerry_test = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy', db='jerry_test', charset='utf8') cur_jerry_test = db_jerry_test.cursor() if pay_time == 0: user_df_service_sql = "select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log " \ "where cl_id ='{cl_id}'".format(cl_id=cl_id) else: user_df_service_sql = "select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log " \ "where cl_id ='{cl_id}' and time < {pay_time}".format(cl_id=cl_id, pay_time=pay_time) cur_jerry_test.execute(user_df_service_sql) data = list(cur_jerry_test.fetchall()) if data: user_df_service = pd.DataFrame(data) user_df_service.columns = ["time", "cl_id", "score_type", "tag_id", "tag_referrer", "action"] else: return user_df_service # 用户的搜索行为: user_df_search = user_df_service[user_df_service["action"] == "do_search"] if debug: # 用户的非搜索、支付行为 user_df_service = user_df_service.loc[ ~user_df_service["action"].isin(["do_search", "api/settlement/alipay_callback"])] else: # 用户的非搜索行为 user_df_service = user_df_service.loc[~user_df_service["action"].isin(["do_search"])] # 搜索词转成tag,合并用户日志 user_df_search_dict = dict() for index, row in user_df_search.iterrows(): if row['tag_referrer'] in all_word_tags: word_tag_list = all_word_tags[row['tag_referrer']] row['tag_id'] = int(word_tag_list[0]) if word_tag_list else -1 else: row['tag_id'] = -1 user_df_service = user_df_service.append(user_df_search) return user_df_service[user_df_service["tag_id"] != -1] except: print("error2_user_portrait", traceback.format_exc()) return user_df_service def get_jerry_test(): db = pymysql.connect(host="172.16.40.170", port=4000, user="st_user", passwd="aqpuBLYzEV7tML5RPsN1pntUzFy", db="jerry_test", charset="utf8") return db, db.cursor() def get_doris_prod(): db = pymysql.connect(host="172.16.30.136", port=3306, user="doris", passwd="o5gbA27hXHHm", db="doris_prod", charset="utf8") return db, db.cursor() def compute_tag3_score(x): if x.score_type == "henqiang": return compute_henqiang(x.days_diff_now, exponential=1) elif x.score_type == "jiaoqiang": return compute_jiaoqiang(x.days_diff_now, exponential=1) elif x.score_type == "ai_scan": return compute_ai_scan(x.days_diff_now, exponential=1) elif x.score_type == "ruoyixiang": return compute_ruoyixiang(x.days_diff_now, exponential=1) else: return compute_validate(x.days_diff_now, exponential=1) def get_tag3_user_log(cl_id): columns = [ "log_time", "score_type", "event_cn", "first_solutions", "second_solutions", "first_demands", "second_demands", "first_positions", "second_positions", "projects" ] try: sql = """select log_time, score_type, event_cn, first_solutions, second_solutions, first_demands, second_demands, first_positions, second_positions, projects from kafka_tag3_log where cl_id = '{}'""".format(cl_id) db, cursor = get_doris_prod() cursor.execute(sql) data = list(cursor.fetchall()) db.close() cursor.close() if data: user_df = pd.DataFrame(data) user_df.columns = columns else: return pd.DataFrame(columns=columns) user_df["days_diff_now"] = round((int(time.time()) - user_df["log_time"].astype(float)) / (24 * 60 * 60)) user_df["tag_score"] = user_df.apply(lambda x: compute_tag3_score(x), axis=1) return user_df except Exception as e: print(e) return pd.DataFrame(columns=columns) # CREATE TABLE `user_tag3_portrait` ( # `id` int(11) NOT NULL AUTO_INCREMENT, # `date` text NOT NULL, # `cl_id` text NOT NULL, # `first_solutions` text NOT NULL, # `second_solutions` text NOT NULL, # `first_demands` text NOT NULL, # `second_demands` text NOT NULL, # `first_positions` text NOT NULL, # `second_positions` text NOT NULL, # `projects` text NOT NULL, # PRIMARY KEY(`id`) # ) def write_user_portrait(cl_id, first_solutions, second_solutions, first_demands, second_demands, first_positions, second_positions, projects): try: today = datetime.date.today() oneday = datetime.timedelta(days=1) yesterday = today - oneday sql = """insert into user_tag3_portrait values(null, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')""".format( yesterday, cl_id, first_solutions, second_solutions, first_demands, second_demands, first_positions, second_positions, projects) db, cursor = get_jerry_test() cursor.execute(sql) db.commit() db.close() cursor.close() except Exception as e: print("write db error") print(e) def write_user_portrait_doris(cl_id, first_solutions, second_solutions, first_demands, second_demands, first_positions, second_positions, projects): try: today = datetime.date.today() oneday = datetime.timedelta(days=1) yesterday = today - oneday sql = """insert into user_tag3_portrait values(null, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')""".format( yesterday, cl_id, first_solutions, second_solutions, first_demands, second_demands, first_positions, second_positions, projects) db, cursor = get_doris_prod() cursor.execute(sql) db.commit() db.close() cursor.close() except Exception as e: print("write db error") print(e) # CREATE TABLE `user_tag3_event_portrait` ( # `id` int(11) NOT NULL AUTO_INCREMENT, # `date` text NOT NULL, # `cl_id` text NOT NULL, # `first_solutions` text NOT NULL, # `second_solutions` text NOT NULL, # `first_demands` text NOT NULL, # `second_demands` text NOT NULL, # `first_positions` text NOT NULL, # `second_positions` text NOT NULL, # `projects` text NOT NULL, # `event_cn` text NOT NULL, # PRIMARY KEY(`id`) # ) def write_user_portrait_by_event(cl_id, first_solutions, second_solutions, first_demands, second_demands, first_positions, second_positions, projects, event): try: today = datetime.date.today() oneday = datetime.timedelta(days=1) yesterday = today - oneday sql = """insert into user_tag3_event_portrait values(null, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')""".format( yesterday, cl_id, first_solutions, second_solutions, first_demands, second_demands, first_positions, second_positions, projects, event) db, cursor = get_jerry_test() cursor.execute(sql) db.commit() db.close() cursor.close() except Exception as e: print("write db error") print(e) def get_redis_client(): return redis.StrictRedis.from_url('redis://:ReDis!GmTx*0aN9@172.16.40.173:6379') def get_user_portrait_tag3_from_redis(device_id, limit_score=0): def items_gt_score(d): new_d = dict(sorted(d.items(), key=lambda x: x[1], reverse=True)) res = {tag: float(score) for tag, score in new_d.items() if float(score) >= limit_score} return list(res.keys()) portrait_key = "doris:user_portrait:tag3:device_id:" + str(device_id) redis_client = get_redis_client() if redis_client.exists(portrait_key): user_portrait = json.loads(redis_client.get(portrait_key).decode("utf-8")) first_demands = items_gt_score(user_portrait.get("first_demands", {})) second_demands = items_gt_score(user_portrait.get("second_demands", {})) first_solutions = items_gt_score(user_portrait.get("first_solutions", {})) second_solutions = items_gt_score(user_portrait.get("second_solutions", {})) first_positions = items_gt_score(user_portrait.get("first_positions", {})) second_positions = items_gt_score(user_portrait.get("second_positions", {})) projects = items_gt_score(user_portrait.get("projects", {})) anecdote_tags = items_gt_score(user_portrait.get("anecdote_tags", {})) return { "first_demands": first_demands, "second_demands": second_demands, "first_solutions": first_solutions, "second_solutions": second_solutions, "first_positions": first_positions, "second_positions": second_positions, "projects": projects, "anecdote_tags": anecdote_tags } return {} def get_user_portrait_tag3_with_score(device_id): portrait_key = "doris:user_portrait:tag3:device_id:" + str(device_id) redis_client = get_redis_client() if redis_client.exists(portrait_key): return json.loads(redis_client.get(portrait_key).decode("utf-8")) return {}