import json import logging import smtplib import socket import time import traceback import datetime import os import sys from email.mime.application import MIMEApplication from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email.utils import formataddr from urllib import error import requests # 导入requests.exceptions模块中的三种异常类 from requests.exceptions import ReadTimeout, HTTPError, RequestException, ConnectionError from requests.packages.urllib3.exceptions import ReadTimeoutError from socket import timeout import zipfile from retrying import retry import pandas as pd import requests from lxml import etree logger = logging.getLogger(__name__) def send_email_tome(): try: from_addrs = 'lixiaofang@igengmei.com' password = 'EzJzSRyEG4Jibuy9' toaddrs = "lixiaofang@igengmei.com" content = '爬取更美热搜前100的词召回的商品,内容详见表格' text_apart = MIMEText(content, 'plain', "utf-8") zip_file_week = 'gengmei_service.csv.gz' zip_apart_week = MIMEApplication(open(zip_file_week, 'rb').read()) zip_apart_week.add_header('Content-Disposition', 'attachment', filename=zip_file_week) m = MIMEMultipart() m.attach(text_apart) m.attach(zip_apart_week) m['From'] = formataddr(("李小芳", from_addrs)) m["To"] = formataddr(("李小芳", toaddrs)) m['Subject'] = '新氧商品信息' try: server = smtplib.SMTP_SSL('smtp.exmail.qq.com', 465, timeout=1200) server.login(from_addrs, password) server.sendmail(from_addrs, [toaddrs], m.as_string()) print('success') server.quit() except smtplib.SMTPException as e: print('error', e) except Exception as e: print(str(e)) logger.error("catch exception,main:%s" % traceback.format_exc()) import re def get_keynote_sentence(content): try: content_list = [] ss = content.encode('utf-16', 'surrogatepass').decode('utf-16') dr = re.compile(r"<[^>]+>", re.S) str_re = dr.sub("", ss) para = re.sub('([;。!?\?])([^”’])', r"\1\n\2", str_re) # 单字符断句符 para = re.sub('(\.{6})([^”’])', r"\1\n\2", para) # 英文省略号 para = re.sub('(\…{2})([^”’])', r"\1\n\2", para) # 中文省略号 para = re.sub('([;。!?\?][”’])([^,。!?\?])', r'\1\n\2', para) para = para.rstrip() # 段尾如果有多余的\n就去掉它 return para except: logging.error("catch exception,logins:%s" % traceback.format_exc()) return [] # all_no_city = [] # # def update_city_name(hospital_name, city_name): # if hospital_name[:2] in ["北京", "上海", "广州", "深圳", "杭州", "成都", "重庆", "南京", "武汉", "长沙", "郑州", "西安"]: # return hospital_name[:2] # if "北京" in hospital_name or "清华" in hospital_name: # return "北京" # if "杭州" in hospital_name: # return "杭州" # if "西安" in hospital_name: # return "西安" # if "深圳" in hospital_name: # return "深圳" # if "重庆" in hospital_name: # return "重庆" # if "南京" in hospital_name: # return "南京" # if "武汉" in hospital_name: # return "武汉" # if "成都" in hospital_name or "四川" in hospital_name: # return "成都" # if "长沙" in hospital_name or "湖南" in hospital_name: # return "长沙" # if "上海" in hospital_name: # return "上海" # if "广州" in hospital_name: # return "广州" # if "郑州" in hospital_name or "河南" in hospital_name: # return "郑州" # else: # all_no_city.append(hospital_name) # return city_name # if __name__ == '__main__': begin = time.time() nowday = datetime.datetime.now() today = str(nowday).split()[0] all_data = [] # city_list = ["北京", "上海", "广州", "深圳", "杭州", "成都", "重庆", "南京", "武汉", "长沙", "郑州", "西安"] city_list = ["gengmei_save_data_2021-07-27.txt", "gengmei_save_data_2021-07-28.txt", "gengmei_save_data_2021-07-29.txt", "gengmei_save_data_2021-07-30.txt"] for city_name in city_list: # file_name = "gengmei_save_data_2021-07-22" + city_name + ".txt" if os.path.exists(city_name): open_file = open(city_name, "r", encoding="utf-8") print(city_name) for item in open_file.readlines(): try: data = json.loads(item.strip()) all_data.append(data) except: pass open_file.close() if len(all_data): res = pd.DataFrame(all_data) res.to_csv("gengmei_service.csv.gz", compression='gzip', index=False, encoding="gb18030") send_email_tome() print(time.time() - begin) print("end")