Commit 6394ec7a authored by 李小芳's avatar 李小芳

add

parent faa5455c
......@@ -174,7 +174,8 @@ class CrawlerMain(object):
service_info['平台'] = "新氧"
service_info['链接'] = "https://y.soyoung.com/cp{}".format(pid)
sort_service_info = sorted(service_info.items(), key=lambda x: self.title.index(x[0]), reverse=False)
sort_service_info = sorted(service_info.items(), key=lambda x: self.title.index(x[0]),
reverse=False)
page_service_pids.append(dict(sort_service_info))
else:
current_end_flat = True
......@@ -305,9 +306,6 @@ class SoYongSpider(object):
get_lasted_data.append(keyword)
for page in range(1, self.page_num): # 筛选前100个
if self.page_num == page + 1 and len(get_lasted_data) == 2:
get_lasted_data.append(page)
word = str(keyword + city_name)
if word not in self.have_get_service_info.keys() or self.have_get_service_info[word] < 10:
print(city_name, ",", city_id, ",", keyword, ",", page)
......@@ -321,6 +319,7 @@ class SoYongSpider(object):
get_data_file.write(json.dumps(data))
get_data_file.write("\n")
if current_end_flat == True:
get_lasted_data.append(page)
break
else:
pass
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment