Commit 6394ec7a authored by 李小芳's avatar 李小芳

add

parent faa5455c
...@@ -174,7 +174,8 @@ class CrawlerMain(object): ...@@ -174,7 +174,8 @@ class CrawlerMain(object):
service_info['平台'] = "新氧" service_info['平台'] = "新氧"
service_info['链接'] = "https://y.soyoung.com/cp{}".format(pid) service_info['链接'] = "https://y.soyoung.com/cp{}".format(pid)
sort_service_info = sorted(service_info.items(), key=lambda x: self.title.index(x[0]), reverse=False) sort_service_info = sorted(service_info.items(), key=lambda x: self.title.index(x[0]),
reverse=False)
page_service_pids.append(dict(sort_service_info)) page_service_pids.append(dict(sort_service_info))
else: else:
current_end_flat = True current_end_flat = True
...@@ -305,9 +306,6 @@ class SoYongSpider(object): ...@@ -305,9 +306,6 @@ class SoYongSpider(object):
get_lasted_data.append(keyword) get_lasted_data.append(keyword)
for page in range(1, self.page_num): # 筛选前100个 for page in range(1, self.page_num): # 筛选前100个
if self.page_num == page + 1 and len(get_lasted_data) == 2:
get_lasted_data.append(page)
word = str(keyword + city_name) word = str(keyword + city_name)
if word not in self.have_get_service_info.keys() or self.have_get_service_info[word] < 10: if word not in self.have_get_service_info.keys() or self.have_get_service_info[word] < 10:
print(city_name, ",", city_id, ",", keyword, ",", page) print(city_name, ",", city_id, ",", keyword, ",", page)
...@@ -321,6 +319,7 @@ class SoYongSpider(object): ...@@ -321,6 +319,7 @@ class SoYongSpider(object):
get_data_file.write(json.dumps(data)) get_data_file.write(json.dumps(data))
get_data_file.write("\n") get_data_file.write("\n")
if current_end_flat == True: if current_end_flat == True:
get_lasted_data.append(page)
break break
else: else:
pass pass
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment