Commit f160221e authored by litaolemo's avatar litaolemo

update

parent 071ea91d
......@@ -457,7 +457,7 @@ class Crawler_toutiao():
output_to_es_raw=False,
output_to_es_register=False,
es_index=None,
doc_type=None, proxies_num=1):
doc_type=None, proxies_num=3):
self.search_page_old(keyword, search_pages_max=search_pages_max, output_to_es_raw=output_to_es_raw,
output_to_es_register=output_to_es_register,
es_index=es_index,
......
......@@ -48,8 +48,12 @@ class Crawler_zhihu():
self.video_data['platform'] = self.platform
# remove fields that crawled data don't have
pop_key_Lst = ['channel', 'describe', 'isOriginal', "repost_count", "video_id"]
with open('./zhihu.js', 'r', encoding='utf-8') as f:
js = f.read()
try:
with open('./zhihu.js', 'r', encoding='utf-8') as f:
js = f.read()
except:
with open('/srv/apps/crawler/crawler_sys/site_crawler/zhihu.js', 'r', encoding='utf-8') as f:
js = f.read()
self.exec_js = execjs.compile(js)
for popk in pop_key_Lst:
self.video_data.pop(popk)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment