Commit f160221e authored by litaolemo's avatar litaolemo

update

parent 071ea91d
...@@ -457,7 +457,7 @@ class Crawler_toutiao(): ...@@ -457,7 +457,7 @@ class Crawler_toutiao():
output_to_es_raw=False, output_to_es_raw=False,
output_to_es_register=False, output_to_es_register=False,
es_index=None, es_index=None,
doc_type=None, proxies_num=1): doc_type=None, proxies_num=3):
self.search_page_old(keyword, search_pages_max=search_pages_max, output_to_es_raw=output_to_es_raw, self.search_page_old(keyword, search_pages_max=search_pages_max, output_to_es_raw=output_to_es_raw,
output_to_es_register=output_to_es_register, output_to_es_register=output_to_es_register,
es_index=es_index, es_index=es_index,
......
...@@ -48,8 +48,12 @@ class Crawler_zhihu(): ...@@ -48,8 +48,12 @@ class Crawler_zhihu():
self.video_data['platform'] = self.platform self.video_data['platform'] = self.platform
# remove fields that crawled data don't have # remove fields that crawled data don't have
pop_key_Lst = ['channel', 'describe', 'isOriginal', "repost_count", "video_id"] pop_key_Lst = ['channel', 'describe', 'isOriginal', "repost_count", "video_id"]
try:
with open('./zhihu.js', 'r', encoding='utf-8') as f: with open('./zhihu.js', 'r', encoding='utf-8') as f:
js = f.read() js = f.read()
except:
with open('/srv/apps/crawler/crawler_sys/site_crawler/zhihu.js', 'r', encoding='utf-8') as f:
js = f.read()
self.exec_js = execjs.compile(js) self.exec_js = execjs.compile(js)
for popk in pop_key_Lst: for popk in pop_key_Lst:
self.video_data.pop(popk) self.video_data.pop(popk)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment