# -*- coding: utf-8 -*- """ Created on Thu Feb 1 17:16:16 2018 Change check in the morning to be checking data that's fetched 3 days ago. @author: hanye """ from elasticsearch import Elasticsearch #from es_hy_sign_in import es import smtplib from email.message import EmailMessage import datetime import sys def videonumber_alert(days_from_running, test_model=False, f_log=None): video_num_criteria={ 'short-video': { 'observed': { 'miaopai': 400000, 'toutiao': 900000, 'new_tudou': 900000, 'kwai': 3e6, '腾讯视频': 15e4, '新华社': 300, '人民日报': 300, '腾讯新闻': 45e3, 'haokan': 600000 }, 'new_released': { 'miaopai': 30000, 'toutiao': 50000, 'new_tudou': 50000, 'kwai': 1e5, '腾讯视频': 5e3, '新华社': 10, '人民日报': 10, '腾讯新闻': 1.5e3, 'haokan': 30000, } }, # 'live-video': { # 'observed': { # '斗鱼直播': 30e4, # '花椒': 270e3, # '一直播': 60e3, # #'映客': 100000, # '新华社': 600, # '央视新闻+': 300, # '人民日报': 90, # '头条直播': 3000, # '腾讯新闻直播': 900, # }, # 'new_released': { # '斗鱼直播': 4e4, # '花椒': 9000, # '一直播': 2000, # '新华社': 20, # '央视新闻+': 10, # 波动从0到20 # '人民日报': 1, # 波动从0到10 # '头条直播': 100, # 从0到200 # '腾讯新闻直播': 30, # } # } } email_group={ 'short-video': [ 'hanye@csm.com.cn', 'zhouyujiang@csm.com.cn', 'zhangtianli@csm.com.cn', "litao@csm.com.cn", "gengdi@csm.com.cn", "luojia@csm.com.cn", "zhangminghui@csm.com.cn", ], 'live-video': [ 'hanye@csm.com.cn', 'zhouyujiang@csm.com.cn', 'yangjinshu@csm.com.cn', 'xinyue@csm.com.cn', 'zhangtianli@csm.com.cn', ], } if test_model==True: email_group={ 'short-video': [ 'hanye@csm.com.cn', ], 'live-video': [ 'hanye@csm.com.cn', ], } email_msg_suffix=('\n\n\n' +'-'*80+'\n' +'这是自动发送的邮件,可以不用回复。\n' +'This is an automatically sent message. You do NOT need to reply.\n') es=Elasticsearch(hosts='192.168.17.11', port=9200) index_short_video='maintainance-short-video' doc_type_short_video='daily' index_live_video='maintainance-live-video' doc_type_live_video='daily' idx_dict={ 'short-video': { 'index': index_short_video, 'doc_type': doc_type_short_video, 'data_index': 'short-video-production', 'data_doc_type': 'daily-url', }, 'live-video': { 'index': index_live_video, 'doc_type': doc_type_live_video, 'data_index': 'live-video-production', 'data_doc_type': 'daily-url', } } today=datetime.datetime.now()-datetime.timedelta(days=days_from_running) ## test #today=datetime.datetime(2018,1,12) if f_log==None: path='/home/hanye/project_data/Python/Projects/proj-short-videos/maintenance/log/' log_fn='email_alert_for_%s_log' % datetime.datetime.strftime(today, '%b-%Y') f_log=open(path+log_fn, 'a') else: f_log=sys.stdout print('*'*80, file=f_log) print('log timestamp ', datetime.datetime.now(), file=f_log) print('Checking video number for fetch_date', today.isoformat()[:10], file=f_log) fetch_year=today.year fetch_month=today.month fetch_day=today.day alert_msg={} for idx in video_num_criteria: alert_msg[idx]={} for stats_type in ['observed', 'new_released']: alert_msg[idx][stats_type]=[] for platform in video_num_criteria[idx][stats_type]: search_body={ "query": { "bool": { "filter": [ {"term": {"fetch_year": fetch_year}}, {"term": {"fetch_month": fetch_month}}, {"term": {"fetch_day": fetch_day}}, {"term": {"platform.keyword": platform}}, {"term": {"stats_type.keyword": stats_type}}, ] } } } search_resp=es.search(index=idx_dict[idx]['index'], doc_type=idx_dict[idx]['doc_type'], body=search_body, request_timeout=100) if search_resp['hits']['total']>0: video_num=search_resp['hits']['hits'][0]['_source']['video_num'] fetch_date=search_resp['hits']['hits'][0]['_source']['fetch_date'] fetch_date_str=datetime.datetime.fromtimestamp(fetch_date/1e3).isoformat()[:10] chk_source=idx_dict[idx]['index'] else: # if no hits in maintainance index, search directly in data index fetch_time_start_T=datetime.datetime(today.year, today.month, today.day) fetch_time_start=int(fetch_time_start_T.timestamp()*1e3) fetch_time_end=int((fetch_time_start_T+datetime.timedelta(days=1)).timestamp()*1e3) if stats_type=='observed': search_data_idx={ "query": { "bool": { "filter": [ {"term": {"platform.keyword": platform}}, {"range": {"fetch_time": {"gte": fetch_time_start, "lt": fetch_time_end}}} ] } }, "size": 0, } elif stats_type=='new_released': fetch_time_start_ts_enlarge=int(fetch_time_start-24*3600*1e3) search_data_idx={ "query": { "bool": { "filter": [ {"term": {"platform.keyword": platform}}, {"range": {"release_time": {"gte": fetch_time_start_ts_enlarge, "lt": fetch_time_end}}}, ] } }, "size": 0, } else: print('Error value with stats_type: %s, system exit' % stats_type, file=f_log) sys.exit(1) search_data_idx_resp=es.search(index=idx_dict[idx]['data_index'], doc_type=idx_dict[idx]['data_doc_type'], body=search_data_idx, request_timeout=100) video_num=search_data_idx_resp['hits']['total'] fetch_date_str=today.isoformat()[:10] chk_source=idx_dict[idx]['data_index'] if video_num0: email_msg_body+=('%s %s 预警:\n' % (idx, stats_type_dict[stats_type])) for ml in alert_msg[idx][stats_type]: msgline='平台 %s 抓取 %s 视频条目数:%d,低于预警值(%d) %.2f%% \n' % (ml['platform'], ml['fetch_date'], ml['video_num'], ml['alert_criteria'], ml['short_perct']) email_msg_body+=msgline email_msg_body+='\nchecking data source index name: %s\n\n\n' % alert_msg[idx][stats_type][0]['chk_source'] if email_msg_body!='': email_msg_body+=email_msg_suffix print('email_msg_body:\n', email_msg_body, file=f_log) email_msg=EmailMessage() email_msg.set_content(email_msg_body) email_msg['Subject']=email_subj email_msg['From']=sender email_msg['to']=email_group[idx] try: server=smtplib.SMTP(host=csm_mail_service) server.send_message(email_msg) server.quit() print('Successfully sent email to %s for %s' % (email_group[idx], idx), datetime.datetime.now(), file=f_log) print('email_msg:\n', email_msg, file=f_log) except: print('Failed to connect email server.', datetime.datetime.now(), file=f_log) else: print('All platforms is ok for %s on fetch_date %s, %s' % (idx, today.isoformat()[:10], datetime.datetime.now()), file=f_log) print('Alert criteria:\n', video_num_criteria[idx], file=f_log) print('\n\n', file=f_log) f_log.close()