# coding=utf-8 import pymysql from elasticsearch import Elasticsearch import smtplib, xlwt, logging, traceback, datetime import smtplib from email.mime.text import MIMEText from email.mime.image import MIMEImage from email.mime.multipart import MIMEMultipart from email.mime.application import MIMEApplication from email.utils import formataddr from maintenance.func_send_email_with_file import send_file_email import zipfile es = Elasticsearch([ { 'host': '172.16.31.17', 'port': 9200, }, { 'host': '172.16.31.11', 'port': 9200, }]) # def zipDir(dirpath,outFullName): # """ # 压缩指定文件夹 # :param dirpath: 目标文件夹路径 # :param outFullName: 压缩文件保存路径+xxxx.zip # :return: 无 # """ # import zipfile # zip = zipfile.ZipFile(outFullName,"w",zipfile.ZIP_DEFLATED) # for path,dirnames,filenames in os.walk(dirpath): # # 去掉目标跟路径,只对目标文件夹下边的文件及文件夹进行压缩 # fpath = path.replace(dirpath,'') # # for filename in filenames: # zip.write(os.path.join(path,filename),os.path.join(fpath,filename)) # zip.close() def send_email_tome(): try: date = datetime.datetime.now().date() - datetime.timedelta(days=1) fromaddr = 'litao@igengmei.com' # password = 'Lemo1995' # toaddrs = "lixiaofang@igengmei.com" # toaddrs1 = "duanyingrong@igengmei.com" # toaddrs2 = "dengguangyu@igengmei.com" # toaddrs3 = "wangxin@igengmei.com"` # toaddrs4 ="hezijun@igengmei.com" # toaddrs5 = "malinxi@igengmei.com" # toaddrs6 = "litao@igengmei.com" content = 'hi all:附件为' + str(date) + '的搜索词数据统计结果以及近一周的数据统计结果,请查收!' zipFile = "/srv/apps/crawler/crawler_sys/utils/近一月数据统计结果.xls" # out_path = "/srv/apps/crawler/近一周数据统计结果.zip" # f = zipfile.ZipFile(zipFile, 'w', zipfile.ZIP_DEFLATED) # f.write(out_path) # f.close() # zipFile = '昨日数据统计结果.xls' send_file_email("", "", email_group=["<duanyingrong@igengmei.com>","<litao@igengmei.com>"], title_str=content , email_msg_body_str=content, file=zipFile) except Exception as e: print(e) def get_es_word(word): ###answer results = es.search( index='gm-dbmw-answer-read', doc_type='answer', timeout='10s', size=0, body={ "query": { "bool": { "minimum_should_match": 1, "should": [{"match_phrase": {"title": {"query": word, "analyzer": "gm_default_index"}}}, {"match_phrase": {"desc": {"query": word, "analyzer": "gm_default_index"}}}, {"match_phrase": {"answer": {"query": word, "analyzer": "gm_default_index"}}}], "must": [ { "term": { "is_online": True } }, { "terms": { "content_level": [6, 5, 4, 3.5, 3] } }, { "range": { "content_length": { "gte": 30 } } }], } }, } ) answer_content_num = results["hits"]["total"] # tractate results = es.search( index='gm-dbmw-tractate-read', doc_type='tractate', timeout='10s', size=0, body={ "query": { "bool": { "minimum_should_match": 1, "should": [{"match_phrase": {"content": {"query": word, "analyzer": "gm_default_index"}}}, { "match_phrase": {"tractate_tag_name": {"query": word, "analyzer": "gm_default_index"}}}, { "match_phrase": {"tractate_tag_name_content": {"query": word, "analyzer": "gm_default_index"}}}], "must": [{"term": {"is_online": True}}, { "terms": {"content_level": [6, 5, 4, 3.5, 3]} }] } } } ) tractate_content_num = results["hits"]["total"] ###diary 日记 results = es.search( index='gm-dbmw-diary-read', doc_type='diary', timeout='10s', size=0, body={ "query": { "bool": { "minimum_should_match": 1, "should": [{"match_phrase": {"tags": {"query": word, "analyzer": "gm_default_index"}}}, {"match_phrase": {"answer": {"query": word, "analyzer": "gm_default_index"}}}, {"match_phrase": {"service.name": {"query": word, "analyzer": "gm_default_index"}}}], "must": [{"term": {"is_online": True}}, { "term": { "has_cover": True } }, {"term": { "is_sink": False } }, { "term": { "has_after_cover": True } }, { "term": { "has_before_cover": True } }, {"range": {"content_level": {"gte": "3"}}}, { "term": { "content_simi_bol_show": 0 } } ] } }, } ) diary_content_num = results["hits"]["total"] diary_query = { "query": { "bool": { "must": [{ "term": { "is_online": True } }, { "term": { "has_cover": True } }, { "term": { "is_sink": False } }, { "term": { "has_after_cover": True } }, { "term": { "has_before_cover": True } }, { "terms": { "content_level": [6, 5, 4, 3.5, 3] } }, { "term": { "content_simi_bol_show": 0 } }] } }, "_source": ["id"] } answer_query = { "query": { "bool": { "must": [{ "term": { "is_online": True } }, { "terms": { "content_level": [6, 5, 4, 3.5, 3] } }, { "range": { "content_length": { "gte": 30 } } }] } } } tractate_query = { "query": { "bool": { "must": [{ "term": { "is_online": True } }, { "terms": { "content_level": [6, 5, 4, 3.5, 3] } }] } } } return answer_content_num, tractate_content_num, diary_content_num class WritrExcel(): def set_style(self, name, height, bold=False): style = xlwt.XFStyle() # 初始化样式 font = xlwt.Font() # 为样式创建字体 font.name = name font.bold = bold font.color_index = 4 font.height = height style.font = font return style # 写入Excel def write_excel(self, path, rows): # 创建工作簿 workbook = xlwt.Workbook(encoding='utf-8') # 创建sheet data_sheet = workbook.add_sheet('Sheet1') # 将样式定义在循环之外 default = self.set_style('Times New Roman', 220, True) j = k = 0 # 循环读取每一行数据并写入Excel for row in rows[:65530]: for i in range(len(row)): try: # 写入 data_sheet.write((j + k), i, row[i], default) except: print(i) raise # data_sheet.write(1, i, row1[i], self.set_style('Times New Roman', 220, True)) k = k + 1 workbook.save(path) print("写入文件成功,共" + str(k) + "行数据") if __name__ == "__main__": tag_names_list = [] tag_names_list_week = [] all_data_day = [] all_data_week = [] db_zhengxing_eagle = pymysql.connect(host="172.16.30.136", port=3306, user="doris", password="o5gbA27hXHHm", db="doris_prod", charset='utf8', cursorclass=pymysql.cursors.DictCursor) zhengxing_cursor = db_zhengxing_eagle.cursor() # date = datetime.datetime.now().date() - datetime.timedelta(days=30) # sql = 'select keywords,sum(sorted) as nums,uv from api_search_words where is_delete = 0 and create_time = "' + str( # date) + '" group by keywords order by nums desc' # print(sql) # zhengxing_cursor.execute("set names 'UTF8'") # zhengxing_cursor.execute(sql) # data = zhengxing_cursor.fetchall() # # tup_title = ("关键词", "搜索次数", "uv", "日记数量", "回答数量", "帖子数量") # for name in list(data): # word = name.get("keywords", None) # num = name.get("nums", 0) # uv = name.get("uv", 0) # # answer_content_num, tractate_content_num, diary_content_num = get_es_word(word) # tag_names_list.append([word, num, uv, diary_content_num, answer_content_num, tractate_content_num]) # # all_data_day.append(tup_title) # for item in tag_names_list: # all_data_day.append(tuple(item)) # # path = str(date) + ".xls" # WritrExcel().write_excel(path, tuple(all_data_day)) # print(u'创建demo.xls文件成功') date = datetime.datetime.now().date() - datetime.timedelta(days=15) sql = 'select keywords,sum(sorted) as nums,sum(uv) as uvs from api_search_words where is_delete = 0 and create_time >= "' + str( date) + '" group by keywords order by nums desc' print(sql) zhengxing_cursor.execute("set names 'UTF8'") zhengxing_cursor.execute(sql) data = zhengxing_cursor.fetchall() tup_title = ("关键词", "搜索次数", "uv", "日记数量", "回答数量", "帖子数量") for name in list(data): word = name.get("keywords", None) sorteds = name.get("nums", 0) uv = name.get("uvs", 0) answer_content_num, tractate_content_num, diary_content_num = get_es_word(word) tag_names_list_week.append([word, sorteds, uv, diary_content_num, answer_content_num, tractate_content_num]) all_data_week.append(tup_title) for item in tag_names_list_week: all_data_week.append(tuple(item)) path = "近2周数据统计结果.xls" WritrExcel().write_excel(path, tuple(all_data_week)) print(u'创建demo.xls文件成功') send_email_tome()