Commit ee5a96d0 authored by 李小芳's avatar 李小芳

add

parent fa09f776
...@@ -291,20 +291,11 @@ class SoYongSpider(object): ...@@ -291,20 +291,11 @@ class SoYongSpider(object):
def run(self, city_tags): def run(self, city_tags):
get_data_file = open(self.file_name, "a+", encoding="utf-8") get_data_file = open(self.file_name, "a+", encoding="utf-8")
get_lasted_data = []
self.city_list = [city_tags] self.city_list = [city_tags]
for city_name in self.city_list: # 热门城市 for city_name in self.city_list: # 热门城市
city_id = self.cityIdMapping[city_name] city_id = self.cityIdMapping[city_name]
crawler_xinyang = CrawlerMain(city_id=city_id) crawler_xinyang = CrawlerMain(city_id=city_id)
# print(city_name, self.city_list.index(city_name), len(self.city_list) - 1)
if self.city_list.index(city_name) == len(self.city_list) - 1:
get_lasted_data.append(city_name)
for keyword in self.keywords: # 热门词 for keyword in self.keywords: # 热门词
# print(keyword, self.keywords.index(keyword), len(self.keywords) - 1)
if self.keywords.index(keyword) == len(self.keywords) - 1 and len(get_lasted_data) == 1:
get_lasted_data.append(keyword)
for page in range(1, self.page_num): # 筛选前100个 for page in range(1, self.page_num): # 筛选前100个
word = str(keyword + city_name) word = str(keyword + city_name)
if word not in self.have_get_service_info.keys() or self.have_get_service_info[word] < 10: if word not in self.have_get_service_info.keys() or self.have_get_service_info[word] < 10:
...@@ -326,11 +317,7 @@ class SoYongSpider(object): ...@@ -326,11 +317,7 @@ class SoYongSpider(object):
time.sleep(1) time.sleep(1)
get_data_file.close() get_data_file.close()
print("get_lasted_data:", get_lasted_data)
if len(get_lasted_data) == 2:
return True
else:
return False
def main(city_tag): def main(city_tag):
...@@ -339,12 +326,8 @@ def main(city_tag): ...@@ -339,12 +326,8 @@ def main(city_tag):
today = str(nowday).split()[0] today = str(nowday).split()[0]
file_name = "save_data_" + str(today) + city_tag + ".txt" file_name = "save_data_" + str(today) + city_tag + ".txt"
while (True): spider_obj = SoYongSpider(file_name)
spider_obj = SoYongSpider(file_name) spider_obj.run(city_tags=city_tag)
flat = spider_obj.run(city_tags=city_tag)
print("flat:", flat)
if flat == True:
break
print(time.time() - begin) print(time.time() - begin)
print("end") print("end")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment