Commit ee5a96d0 authored by 李小芳's avatar 李小芳

add

parent fa09f776
......@@ -291,20 +291,11 @@ class SoYongSpider(object):
def run(self, city_tags):
get_data_file = open(self.file_name, "a+", encoding="utf-8")
get_lasted_data = []
self.city_list = [city_tags]
for city_name in self.city_list: # 热门城市
city_id = self.cityIdMapping[city_name]
crawler_xinyang = CrawlerMain(city_id=city_id)
# print(city_name, self.city_list.index(city_name), len(self.city_list) - 1)
if self.city_list.index(city_name) == len(self.city_list) - 1:
get_lasted_data.append(city_name)
for keyword in self.keywords: # 热门词
# print(keyword, self.keywords.index(keyword), len(self.keywords) - 1)
if self.keywords.index(keyword) == len(self.keywords) - 1 and len(get_lasted_data) == 1:
get_lasted_data.append(keyword)
for page in range(1, self.page_num): # 筛选前100个
word = str(keyword + city_name)
if word not in self.have_get_service_info.keys() or self.have_get_service_info[word] < 10:
......@@ -326,11 +317,7 @@ class SoYongSpider(object):
time.sleep(1)
get_data_file.close()
print("get_lasted_data:", get_lasted_data)
if len(get_lasted_data) == 2:
return True
else:
return False
def main(city_tag):
......@@ -339,12 +326,8 @@ def main(city_tag):
today = str(nowday).split()[0]
file_name = "save_data_" + str(today) + city_tag + ".txt"
while (True):
spider_obj = SoYongSpider(file_name)
flat = spider_obj.run(city_tags=city_tag)
print("flat:", flat)
if flat == True:
break
spider_obj = SoYongSpider(file_name)
spider_obj.run(city_tags=city_tag)
print(time.time() - begin)
print("end")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment