Commit 4be5032f authored by 李小芳's avatar 李小芳

add

parent e0268805
...@@ -270,8 +270,6 @@ class SoYongSpider(object): ...@@ -270,8 +270,6 @@ class SoYongSpider(object):
self.page_num = 11 self.page_num = 11
self.file_name = file_name self.file_name = file_name
self.have_get_service_info = self.get_have_spider_keywords() self.have_get_service_info = self.get_have_spider_keywords()
# self.get_data_file = open(file_name, "a+", encoding="utf-8")
# self.read_data_file = open(self.file_name, "r", encoding="utf-8")
def get_have_spider_keywords(self): def get_have_spider_keywords(self):
have_get_service_info = {} have_get_service_info = {}
...@@ -336,6 +334,7 @@ def main(city_tags): ...@@ -336,6 +334,7 @@ def main(city_tags):
nowday = datetime.datetime.now() nowday = datetime.datetime.now()
today = str(nowday).split()[0] today = str(nowday).split()[0]
file_name = "save_data_" + str(today) + city_tags + ".txt" file_name = "save_data_" + str(today) + city_tags + ".txt"
while (True): while (True):
spider_obj = SoYongSpider(file_name) spider_obj = SoYongSpider(file_name)
flat = spider_obj.run(city_tags=city_tags) flat = spider_obj.run(city_tags=city_tags)
...@@ -343,15 +342,6 @@ def main(city_tags): ...@@ -343,15 +342,6 @@ def main(city_tags):
if flat == True: if flat == True:
break break
all_data = []
open_file = open(file_name, "r", encoding="utf-8")
for item in open_file.readlines():
all_data.append(json.loads(item))
res = pd.DataFrame(all_data)
res.to_csv("result1.csv", encoding="gb18030")
send_email_tome()
open_file.close()
print(time.time() - begin) print(time.time() - begin)
print("end") print("end")
......
import json
import logging
import smtplib
import socket
import time
import traceback
import datetime
import os
import sys
from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.utils import formataddr
from urllib import error
import requests
# 导入requests.exceptions模块中的三种异常类
from requests.exceptions import ReadTimeout, HTTPError, RequestException, ConnectionError
from requests.packages.urllib3.exceptions import ReadTimeoutError
from socket import timeout
from retrying import retry
import pandas as pd
import requests
from lxml import etree
logger = logging.getLogger(__name__)
def send_email_tome():
try:
from_addrs = 'lixiaofang@igengmei.com'
password = 'EzJzSRyEG4Jibuy9'
toaddrs = "lixiaofang@igengmei.com"
content = '爬取新氧热搜前100的词召回的商品,内容详见表格'
text_apart = MIMEText(content, 'plain', "utf-8")
zip_file_week = 'result1.csv'
zip_apart_week = MIMEApplication(open(zip_file_week, 'rb').read())
zip_apart_week.add_header('Content-Disposition', 'attachment', filename=zip_file_week)
m = MIMEMultipart()
m.attach(text_apart)
m.attach(zip_apart_week)
m['From'] = formataddr(("李小芳", from_addrs))
m["To"] = formataddr(("李小芳", toaddrs))
m['Subject'] = '新氧商品信息'
try:
server = smtplib.SMTP_SSL('smtp.exmail.qq.com', 465)
server.login(from_addrs, password)
server.sendmail(from_addrs, [toaddrs], m.as_string())
print('success')
server.quit()
except smtplib.SMTPException as e:
print('error', e)
except Exception as e:
print(str(e))
logger.error("catch exception,main:%s" % traceback.format_exc())
if __name__ == '__main__':
begin = time.time()
nowday = datetime.datetime.now()
today = str(nowday).split()[0]
city_list = ["北京", "上海", "广州市", "深圳市", "杭州市", "成都市", "重庆", "南京市", "武汉市", "长沙市", "郑州市", "西安市"]
all_data = []
for city_tags in city_list:
file_name = "save_data_" + str(today) + city_tags + ".txt"
if os.path.exists(file_name):
open_file = open(file_name, "r", encoding="utf-8")
for item in open_file.readlines():
all_data.append(json.loads(item))
open_file.close()
res = pd.DataFrame(all_data)
res.to_csv("result1.csv", encoding="gb18030")
send_email_tome()
print(time.time() - begin)
print("end")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment