Commit e440244e authored by 李小芳's avatar 李小芳

add

parent 43d2037c
...@@ -64,7 +64,6 @@ def send_email_tome(): ...@@ -64,7 +64,6 @@ def send_email_tome():
class CrawlerMain(object): class CrawlerMain(object):
def __init__(self, city_id=-1): def __init__(self, city_id=-1):
self.headers = { self.headers = {
"cookie": "__order_time__=2021-07-16 15:22:00; msg_time=2021-07-16 15:22:00; back_order_time=2021-07-16 15:22:00; complain_time=2021-07-16 15:22:00; uuid=2E2206C5-B5CD-18F9-8B76-D5FE0D078395; __usersign__=1626341221780983876; _ga=GA1.2.2084074278.1626341224; smidV2=20210715174222a8c0fc7fc96128d6b9c09abf5787b250008f7cb10a6f61380; _gid=GA1.2.2004598599.1626602683; PHPSESSID=ace2ec3e62b7d5a8f7021c3c85e0bb00; __postion__=a%3A4%3A%7Bs%3A6%3A%22cityId%22%3Bi%3A0%3Bs%3A8%3A%22cityName%22%3Bs%3A0%3A%22%22%3Bs%3A8%3A%22cityCode%22%3Bi%3A0%3Bs%3A3%3A%22jwd%22%3Bi%3A0%3B%7D; Hm_lvt_b366fbb5465f5a86e1cc2871552e1fdb=1626341224,1626510298,1626602683,1626675657; _gat=1; cityId={}; Hm_lpvt_b366fbb5465f5a86e1cc2871552e1fdb=1626675685".format( "cookie": "__order_time__=2021-07-16 15:22:00; msg_time=2021-07-16 15:22:00; back_order_time=2021-07-16 15:22:00; complain_time=2021-07-16 15:22:00; uuid=2E2206C5-B5CD-18F9-8B76-D5FE0D078395; __usersign__=1626341221780983876; _ga=GA1.2.2084074278.1626341224; smidV2=20210715174222a8c0fc7fc96128d6b9c09abf5787b250008f7cb10a6f61380; _gid=GA1.2.2004598599.1626602683; PHPSESSID=ace2ec3e62b7d5a8f7021c3c85e0bb00; __postion__=a%3A4%3A%7Bs%3A6%3A%22cityId%22%3Bi%3A0%3Bs%3A8%3A%22cityName%22%3Bs%3A0%3A%22%22%3Bs%3A8%3A%22cityCode%22%3Bi%3A0%3Bs%3A3%3A%22jwd%22%3Bi%3A0%3B%7D; Hm_lvt_b366fbb5465f5a86e1cc2871552e1fdb=1626341224,1626510298,1626602683,1626675657; _gat=1; cityId={}; Hm_lpvt_b366fbb5465f5a86e1cc2871552e1fdb=1626675685".format(
city_id), city_id),
...@@ -88,13 +87,11 @@ class CrawlerMain(object): ...@@ -88,13 +87,11 @@ class CrawlerMain(object):
@retry(stop_max_attempt_number=5, wait_fixed=1000) @retry(stop_max_attempt_number=5, wait_fixed=1000)
def get_service_base_info_list(self, pid): def get_service_base_info_list(self, pid):
url = "https://y.soyoung.com/cp{}".format(pid) url = "https://y.soyoung.com/cp{}".format(pid)
try: try:
requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10) requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10)
# 打印状态码 # 打印状态码
page_obj = etree.HTML(requests_res.text) page_obj = etree.HTML(requests_res.text)
return page_obj, url return page_obj, url
except ReadTimeout: except ReadTimeout:
print('timeout') print('timeout')
return None, None return None, None
...@@ -144,26 +141,37 @@ class CrawlerMain(object): ...@@ -144,26 +141,37 @@ class CrawlerMain(object):
print("ConnectionError") print("ConnectionError")
return None return None
def get_services_list(self, res_json, query="", city_name=""): def get_services_list(self, res_json, query="", city_name="", city_id=-1):
page_service_pids = [] page_service_pids = []
current_end_flat = False
for service in res_json.get("responseData", {}).get("arr_product", []): for service in res_json.get("responseData", {}).get("arr_product", []):
current_city = service.get("district_2")
if int(current_city) == int(city_id):
pid = service.get("pid") pid = service.get("pid")
spu_id = service.get("spu_id") spu_id = service.get("spu_id")
doctor_name = service.get("doctor_name") doctor_name = service.get("doctor_name")
hospital_name = service.get("hospital_name") hospital_name = service.get("hospital_name")
service_info = self.get_service_base_info(pid) service_info = self.get_service_base_info(pid)
service_info['美购id'] = spu_id service_info['美购id'] = spu_id
service_info['sku原价'] = service.get("price_origin")
service_info['sku活动价'] = service.get("price_online")
service_info['机构等级'] = service.get("avg_score")
service_info['美购名称'] = service.get("title")
service_info['销量'] = service.get("order_cnt")
service_info['skuid'] = pid service_info['skuid'] = pid
service_info['医生名'] = doctor_name service_info['医生名'] = doctor_name
service_info['医院名称'] = hospital_name service_info['医院名称'] = hospital_name
service_info['query词'] = query service_info['query词'] = query
service_info['城市'] = city_name service_info['城市'] = city_name
service_info['平台'] = "新氧" service_info['平台'] = "新氧"
service_info['链接'] = "https://y.soyoung.com/cp{}".format(pid)
sort_service_info = sorted(service_info.items(), key=lambda x: self.title.index(x[0]), reverse=False) sort_service_info = sorted(service_info.items(), key=lambda x: self.title.index(x[0]), reverse=False)
page_service_pids.append(dict(sort_service_info)) page_service_pids.append(dict(sort_service_info))
else:
return page_service_pids current_end_flat = True
break
return page_service_pids, current_end_flat
def get_service_base_info(self, pid): def get_service_base_info(self, pid):
service_info = dict() service_info = dict()
...@@ -173,17 +181,16 @@ class CrawlerMain(object): ...@@ -173,17 +181,16 @@ class CrawlerMain(object):
"/html[1]/body[1]/div[@class='page-content']" "/html[1]/body[1]/div[@class='page-content']"
"/div[@class='w1000']/div[@class='detail-wrap']/div[@class='width-control']/div" "/div[@class='w1000']/div[@class='detail-wrap']/div[@class='width-control']/div"
) )
service_info['链接'] = url
for base_info in res_json: for base_info in res_json:
if "basic-info" in base_info.xpath("div/@class"): if "basic-info" in base_info.xpath("div/@class"):
service_info["美购名称"] = str(base_info.xpath("div/h1/text()")[0].strip()) # service_info["美购名称"] = str(base_info.xpath("div/h1/text()")[0].strip())
service_info["sku活动价"] = base_info.xpath("div/div[@class='base-price']/em/text()")[0].strip() # 980 # service_info["sku活动价"] = base_info.xpath("div/div[@class='base-price']/em/text()")[0].strip() # 980
service_info["sku原价"] = base_info.xpath("div/div[@class='base-price']/del/text()")[0].strip() # 1980 # service_info["sku原价"] = base_info.xpath("div/div[@class='base-price']/del/text()")[
service_info["销量"] = base_info.xpath("div/div[@class='base-relation']/div[3]/em/text()")[ # 0].strip() # 1980
0].strip() # 110 # service_info["销量"] = base_info.xpath("div/div[@class='base-relation']/div[3]/em/text()")[
service_info["机构等级"] = base_info.xpath("div/div[@class='base-relation']/div[1]/text()")[ # 0].strip() # 110
0].strip() # 110 # service_info["机构等级"] = base_info.xpath("div/div[@class='base-relation']/div[1]/text()")[
# 0].strip() # 110
service_info['可领取预约金优惠券'] = [] service_info['可领取预约金优惠券'] = []
service_info['可用尾款券'] = [] service_info['可用尾款券'] = []
...@@ -235,7 +242,6 @@ class SoYongSpider(object): ...@@ -235,7 +242,6 @@ class SoYongSpider(object):
def __init__(self, file_name): def __init__(self, file_name):
self.cityIdMapping = {'北京': '1', '上海': '9', '重庆': '22', '广州市': '289', '深圳市': '291', '郑州市': '240', '武汉市': '258', self.cityIdMapping = {'北京': '1', '上海': '9', '重庆': '22', '广州市': '289', '深圳市': '291', '郑州市': '240', '武汉市': '258',
'长沙市': '275', '南京市': '162', '成都市': '385', '西安市': '438', '杭州市': '175'} '长沙市': '275', '南京市': '162', '成都市': '385', '西安市': '438', '杭州市': '175'}
self.keywords = ['瘦脸针', '双眼皮', '光子嫩肤', '吸脂', '水光针', '玻尿酸', '热玛吉', '脱毛', '瘦腿针', '鼻综合', '瘦肩针', '下颌角', '线雕', '超声刀', self.keywords = ['瘦脸针', '双眼皮', '光子嫩肤', '吸脂', '水光针', '玻尿酸', '热玛吉', '脱毛', '瘦腿针', '鼻综合', '瘦肩针', '下颌角', '线雕', '超声刀',
'美白针', '美白针',
'眼综合', '隆鼻', '菲洛嘉', '下巴', '热拉提', '点阵激光', '面部吸脂', '开内眼角', '嗨体', '牙齿矫正', '皮秒', '超皮秒', '植发', '眼综合', '隆鼻', '菲洛嘉', '下巴', '热拉提', '点阵激光', '面部吸脂', '开内眼角', '嗨体', '牙齿矫正', '皮秒', '超皮秒', '植发',
...@@ -249,9 +255,9 @@ class SoYongSpider(object): ...@@ -249,9 +255,9 @@ class SoYongSpider(object):
'开眼角', '开眼角',
'海菲秀', '假体下巴', '刷酸', '泪沟', '拉皮', '全身吸脂', '缩鼻翼'] '海菲秀', '假体下巴', '刷酸', '泪沟', '拉皮', '全身吸脂', '缩鼻翼']
self.test_keywords = ['瘦脸针', '双眼皮'] self.test_keywords = ['瘦脸针', '双眼皮']
self.city_list = ["北京", "上海", "广州市", "深圳市", "杭州市", "成都市", "重庆", "南京市", "武汉市", "长沙市", "郑州市", "西安市"] self.city_list = ["南京市", "北京", "上海", "广州市", "深圳市", "杭州市", "成都市", "重庆", "南京市", "武汉市", "长沙市", "郑州市", "西安市"]
self.test_city_list = ["北京", "上海"] self.test_city_list = ["北京", "上海"]
self.page_num = 11 self.page_num = 500
self.file_name = file_name self.file_name = file_name
self.have_get_service_info = self.get_have_spider_keywords() self.have_get_service_info = self.get_have_spider_keywords()
# self.get_data_file = open(file_name, "a+", encoding="utf-8") # self.get_data_file = open(file_name, "a+", encoding="utf-8")
...@@ -298,12 +304,18 @@ class SoYongSpider(object): ...@@ -298,12 +304,18 @@ class SoYongSpider(object):
print(city_name, ",", city_id, ",", keyword, ",", page) print(city_name, ",", city_id, ",", keyword, ",", page)
resJson = crawler_xinyang.get_search_service_info_list(query=keyword, page=page, resJson = crawler_xinyang.get_search_service_info_list(query=keyword, page=page,
city_id=city_id) city_id=city_id)
for data in crawler_xinyang.get_services_list(res_json=resJson, query=keyword, service_info_list, current_end_flat = crawler_xinyang.get_services_list(res_json=resJson,
city_name=city_name): query=keyword,
city_name=city_name,
city_id=city_id)
for data in service_info_list:
get_data_file.write(json.dumps(data)) get_data_file.write(json.dumps(data))
get_data_file.write("\n") get_data_file.write("\n")
if current_end_flat == True:
break
else: else:
pass pass
time.sleep(1) time.sleep(1)
get_data_file.close() get_data_file.close()
......
...@@ -65,7 +65,6 @@ def send_email_tome(): ...@@ -65,7 +65,6 @@ def send_email_tome():
class CrawlerMain(object): class CrawlerMain(object):
def __init__(self, city_id=-1): def __init__(self, city_id=-1):
self.headers = { self.headers = {
"cookie": "__order_time__=2021-07-16 15:22:00; msg_time=2021-07-16 15:22:00; back_order_time=2021-07-16 15:22:00; complain_time=2021-07-16 15:22:00; uuid=2E2206C5-B5CD-18F9-8B76-D5FE0D078395; __usersign__=1626341221780983876; _ga=GA1.2.2084074278.1626341224; smidV2=20210715174222a8c0fc7fc96128d6b9c09abf5787b250008f7cb10a6f61380; _gid=GA1.2.2004598599.1626602683; PHPSESSID=ace2ec3e62b7d5a8f7021c3c85e0bb00; __postion__=a%3A4%3A%7Bs%3A6%3A%22cityId%22%3Bi%3A0%3Bs%3A8%3A%22cityName%22%3Bs%3A0%3A%22%22%3Bs%3A8%3A%22cityCode%22%3Bi%3A0%3Bs%3A3%3A%22jwd%22%3Bi%3A0%3B%7D; Hm_lvt_b366fbb5465f5a86e1cc2871552e1fdb=1626341224,1626510298,1626602683,1626675657; _gat=1; cityId={}; Hm_lpvt_b366fbb5465f5a86e1cc2871552e1fdb=1626675685".format( "cookie": "__order_time__=2021-07-16 15:22:00; msg_time=2021-07-16 15:22:00; back_order_time=2021-07-16 15:22:00; complain_time=2021-07-16 15:22:00; uuid=2E2206C5-B5CD-18F9-8B76-D5FE0D078395; __usersign__=1626341221780983876; _ga=GA1.2.2084074278.1626341224; smidV2=20210715174222a8c0fc7fc96128d6b9c09abf5787b250008f7cb10a6f61380; _gid=GA1.2.2004598599.1626602683; PHPSESSID=ace2ec3e62b7d5a8f7021c3c85e0bb00; __postion__=a%3A4%3A%7Bs%3A6%3A%22cityId%22%3Bi%3A0%3Bs%3A8%3A%22cityName%22%3Bs%3A0%3A%22%22%3Bs%3A8%3A%22cityCode%22%3Bi%3A0%3Bs%3A3%3A%22jwd%22%3Bi%3A0%3B%7D; Hm_lvt_b366fbb5465f5a86e1cc2871552e1fdb=1626341224,1626510298,1626602683,1626675657; _gat=1; cityId={}; Hm_lpvt_b366fbb5465f5a86e1cc2871552e1fdb=1626675685".format(
city_id), city_id),
...@@ -89,7 +88,6 @@ class CrawlerMain(object): ...@@ -89,7 +88,6 @@ class CrawlerMain(object):
@retry(stop_max_attempt_number=5, wait_fixed=1000) @retry(stop_max_attempt_number=5, wait_fixed=1000)
def get_service_base_info_list(self, pid): def get_service_base_info_list(self, pid):
url = "https://y.soyoung.com/cp{}".format(pid) url = "https://y.soyoung.com/cp{}".format(pid)
page_obj =None
try: try:
requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10) requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10)
# 打印状态码 # 打印状态码
...@@ -97,23 +95,23 @@ class CrawlerMain(object): ...@@ -97,23 +95,23 @@ class CrawlerMain(object):
return page_obj, url return page_obj, url
except ReadTimeout: except ReadTimeout:
print('timeout') print('timeout')
return None, url return None, None
except HTTPError: except HTTPError:
print('httperror') print('httperror')
return None, url return None, None
# 请求异常 # 请求异常
except RequestException: except RequestException:
print('reqerror') print('reqerror')
return None, url return None, None
except socket.timeout: except socket.timeout:
print(socket.timeout) print(socket.timeout)
return None, url return None, None
except ReadTimeoutError: except ReadTimeoutError:
print("ReadTimeoutError") print("ReadTimeoutError")
return None, url return None, None
except ConnectionError: except ConnectionError:
print("ConnectionError") print("ConnectionError")
return None, url return None, None
def get_search_service_info_list(self, page=1, city_id=-1, query=""): def get_search_service_info_list(self, page=1, city_id=-1, query=""):
url = "https://www.soyoung.com/searchNew/product?" \ url = "https://www.soyoung.com/searchNew/product?" \
...@@ -144,27 +142,37 @@ class CrawlerMain(object): ...@@ -144,27 +142,37 @@ class CrawlerMain(object):
print("ConnectionError") print("ConnectionError")
return None return None
def get_services_list(self, res_json, query="", city_name=""): def get_services_list(self, res_json, query="", city_name="", city_id=-1):
page_service_pids = [] page_service_pids = []
current_end_flat = False
for service in res_json.get("responseData", {}).get("arr_product", []): for service in res_json.get("responseData", {}).get("arr_product", []):
current_city = service.get("district_2")
if int(current_city) == int(city_id):
pid = service.get("pid") pid = service.get("pid")
spu_id = service.get("spu_id") spu_id = service.get("spu_id")
doctor_name = service.get("doctor_name") doctor_name = service.get("doctor_name")
hospital_name = service.get("hospital_name") hospital_name = service.get("hospital_name")
service_info = self.get_service_base_info(pid) service_info = self.get_service_base_info(pid)
service_info['美购id'] = spu_id service_info['美购id'] = spu_id
service_info['sku原价'] = service.get("price_origin")
service_info['sku活动价'] = service.get("price_online")
service_info['机构等级'] = service.get("avg_score")
service_info['美购名称'] = service.get("title")
service_info['销量'] = service.get("order_cnt")
service_info['skuid'] = pid service_info['skuid'] = pid
service_info['医生名'] = doctor_name service_info['医生名'] = doctor_name
service_info['医院名称'] = hospital_name service_info['医院名称'] = hospital_name
print(hospital_name)
service_info['query词'] = query service_info['query词'] = query
service_info['城市'] = city_name service_info['城市'] = city_name
service_info['平台'] = "新氧" service_info['平台'] = "新氧"
service_info['链接'] = "https://y.soyoung.com/cp{}".format(pid)
sort_service_info = sorted(service_info.items(), key=lambda x: self.title.index(x[0]), reverse=False) sort_service_info = sorted(service_info.items(), key=lambda x: self.title.index(x[0]), reverse=False)
page_service_pids.append(dict(sort_service_info)) page_service_pids.append(dict(sort_service_info))
else:
return page_service_pids current_end_flat = True
break
return page_service_pids, current_end_flat
def get_service_base_info(self, pid): def get_service_base_info(self, pid):
service_info = dict() service_info = dict()
...@@ -174,18 +182,16 @@ class CrawlerMain(object): ...@@ -174,18 +182,16 @@ class CrawlerMain(object):
"/html[1]/body[1]/div[@class='page-content']" "/html[1]/body[1]/div[@class='page-content']"
"/div[@class='w1000']/div[@class='detail-wrap']/div[@class='width-control']/div" "/div[@class='w1000']/div[@class='detail-wrap']/div[@class='width-control']/div"
) )
service_info['链接'] = url
for base_info in res_json: for base_info in res_json:
if "basic-info" in base_info.xpath("div/@class"): if "basic-info" in base_info.xpath("div/@class"):
service_info["美购名称"] = str(base_info.xpath("div/h1/text()")[0].strip()) # service_info["美购名称"] = str(base_info.xpath("div/h1/text()")[0].strip())
service_info["sku活动价"] = base_info.xpath("div/div[@class='base-price']/em/text()")[0].strip() # 980 # service_info["sku活动价"] = base_info.xpath("div/div[@class='base-price']/em/text()")[0].strip() # 980
service_info["sku原价"] = base_info.xpath("div/div[@class='base-price']/del/text()")[ # service_info["sku原价"] = base_info.xpath("div/div[@class='base-price']/del/text()")[
0].strip() # 1980 # 0].strip() # 1980
service_info["销量"] = base_info.xpath("div/div[@class='base-relation']/div[3]/em/text()")[ # service_info["销量"] = base_info.xpath("div/div[@class='base-relation']/div[3]/em/text()")[
0].strip() # 110 # 0].strip() # 110
service_info["机构等级"] = base_info.xpath("div/div[@class='base-relation']/div[1]/text()")[ # service_info["机构等级"] = base_info.xpath("div/div[@class='base-relation']/div[1]/text()")[
0].strip() # 110 # 0].strip() # 110
service_info['可领取预约金优惠券'] = [] service_info['可领取预约金优惠券'] = []
service_info['可用尾款券'] = [] service_info['可用尾款券'] = []
...@@ -237,7 +243,6 @@ class SoYongSpider(object): ...@@ -237,7 +243,6 @@ class SoYongSpider(object):
def __init__(self, file_name): def __init__(self, file_name):
self.cityIdMapping = {'北京': '1', '上海': '9', '重庆': '22', '广州市': '289', '深圳市': '291', '郑州市': '240', '武汉市': '258', self.cityIdMapping = {'北京': '1', '上海': '9', '重庆': '22', '广州市': '289', '深圳市': '291', '郑州市': '240', '武汉市': '258',
'长沙市': '275', '南京市': '162', '成都市': '385', '西安市': '438', '杭州市': '175'} '长沙市': '275', '南京市': '162', '成都市': '385', '西安市': '438', '杭州市': '175'}
self.keywords = ['瘦脸针', '双眼皮', '光子嫩肤', '吸脂', '水光针', '玻尿酸', '热玛吉', '脱毛', '瘦腿针', '鼻综合', '瘦肩针', '下颌角', '线雕', '超声刀', self.keywords = ['瘦脸针', '双眼皮', '光子嫩肤', '吸脂', '水光针', '玻尿酸', '热玛吉', '脱毛', '瘦腿针', '鼻综合', '瘦肩针', '下颌角', '线雕', '超声刀',
'美白针', '美白针',
'眼综合', '隆鼻', '菲洛嘉', '下巴', '热拉提', '点阵激光', '面部吸脂', '开内眼角', '嗨体', '牙齿矫正', '皮秒', '超皮秒', '植发', '眼综合', '隆鼻', '菲洛嘉', '下巴', '热拉提', '点阵激光', '面部吸脂', '开内眼角', '嗨体', '牙齿矫正', '皮秒', '超皮秒', '植发',
...@@ -251,11 +256,13 @@ class SoYongSpider(object): ...@@ -251,11 +256,13 @@ class SoYongSpider(object):
'开眼角', '开眼角',
'海菲秀', '假体下巴', '刷酸', '泪沟', '拉皮', '全身吸脂', '缩鼻翼'] '海菲秀', '假体下巴', '刷酸', '泪沟', '拉皮', '全身吸脂', '缩鼻翼']
self.test_keywords = ['瘦脸针', '双眼皮'] self.test_keywords = ['瘦脸针', '双眼皮']
self.city_list = ["北京", "上海", "广州市", "深圳市", "杭州市", "成都市", "重庆", "南京市", "武汉市", "长沙市", "郑州市", "西安市"] self.city_list = ["南京市", "北京", "上海", "广州市", "深圳市", "杭州市", "成都市", "重庆", "南京市", "武汉市", "长沙市", "郑州市", "西安市"]
self.test_city_list = ["北京", "上海"] self.test_city_list = ["北京", "上海"]
self.page_num = 11 self.page_num = 500
self.file_name = file_name self.file_name = file_name
self.have_get_service_info = self.get_have_spider_keywords() self.have_get_service_info = self.get_have_spider_keywords()
# self.get_data_file = open(file_name, "a+", encoding="utf-8")
# self.read_data_file = open(self.file_name, "r", encoding="utf-8")
def get_have_spider_keywords(self): def get_have_spider_keywords(self):
have_get_service_info = {} have_get_service_info = {}
...@@ -299,12 +306,18 @@ class SoYongSpider(object): ...@@ -299,12 +306,18 @@ class SoYongSpider(object):
print(city_name, ",", city_id, ",", keyword, ",", page) print(city_name, ",", city_id, ",", keyword, ",", page)
resJson = crawler_xinyang.get_search_service_info_list(query=keyword, page=page, resJson = crawler_xinyang.get_search_service_info_list(query=keyword, page=page,
city_id=city_id) city_id=city_id)
for data in crawler_xinyang.get_services_list(res_json=resJson, query=keyword, service_info_list, current_end_flat = crawler_xinyang.get_services_list(res_json=resJson,
city_name=city_name): query=keyword,
city_name=city_name,
city_id=city_id)
for data in service_info_list:
get_data_file.write(json.dumps(data)) get_data_file.write(json.dumps(data))
get_data_file.write("\n") get_data_file.write("\n")
if current_end_flat == True:
break
else: else:
pass pass
time.sleep(1) time.sleep(1)
get_data_file.close() get_data_file.close()
...@@ -315,15 +328,15 @@ class SoYongSpider(object): ...@@ -315,15 +328,15 @@ class SoYongSpider(object):
return False return False
def main(city_tags): def main(city_tag):
begin = time.time() begin = time.time()
nowday = datetime.datetime.now() nowday = datetime.datetime.now()
today = str(nowday).split()[0] today = str(nowday).split()[0]
file_name = "save_data_" + str(today) + city_tags + ".txt" file_name = "save_data_" + str(today) + city_tag + ".txt"
while (True): while (True):
spider_obj = SoYongSpider(file_name) spider_obj = SoYongSpider(file_name)
flat = spider_obj.run(city_tags=city_tags) flat = spider_obj.run(city_tags=city_tag)
print("flat:", flat) print("flat:", flat)
if flat == True: if flat == True:
break break
...@@ -334,7 +347,4 @@ def main(city_tags): ...@@ -334,7 +347,4 @@ def main(city_tags):
if __name__ == "__main__": if __name__ == "__main__":
args = sys.argv[1] args = sys.argv[1]
main(city_tags=args) main(city_tag=args)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment