Commit 071ea91d authored by litaolemo's avatar litaolemo

update

parent 1f2ea02a
...@@ -16,7 +16,7 @@ PARSER = argparse.ArgumentParser(description='video platform search page crawler ...@@ -16,7 +16,7 @@ PARSER = argparse.ArgumentParser(description='video platform search page crawler
# '/crawler_sys/framework/config' # '/crawler_sys/framework/config'
# '/search_keywords.ini'), # '/search_keywords.ini'),
# help=('config file absolute path')) # help=('config file absolute path'))
PARSER.add_argument('-p', '--platform', default=["toutiao","weibo", "zhihu"], action='append', PARSER.add_argument('-p', '--platform', default=[], action='append',
help=('legal platform name is required')) help=('legal platform name is required'))
PARSER.add_argument('-k', '--key_word_platform', default=[], action='append', PARSER.add_argument('-k', '--key_word_platform', default=[], action='append',
help=('key_word_legal platform name is required')) help=('key_word_legal platform name is required'))
...@@ -26,13 +26,13 @@ PARSER.add_argument('-g', '--output_to_es_register', default=True, ...@@ -26,13 +26,13 @@ PARSER.add_argument('-g', '--output_to_es_register', default=True,
help=('output to es register')) help=('output to es register'))
PARSER.add_argument('-n', '--maxpage', default=20, PARSER.add_argument('-n', '--maxpage', default=20,
help=('maxpage')) help=('maxpage'))
PARSER.add_argument('-px', '--proxies_num', default=3,
help=('proxies_num'))
ARGS = PARSER.parse_args() ARGS = PARSER.parse_args()
if ARGS.platform != []:
PLATFORM_LIST = ARGS.platform
# for platform in PLATFORM_LIST: # for platform in PLATFORM_LIST:
# if platform not in legal_platform_name: # if platform not in legal_platform_name:
# print("%s is not a legal platform name, " # print("%s is not a legal platform name, "
...@@ -42,7 +42,8 @@ if ARGS.platform != []: ...@@ -42,7 +42,8 @@ if ARGS.platform != []:
OUTPUT_TO_ES_RAW = ARGS.output_to_es_raw OUTPUT_TO_ES_RAW = ARGS.output_to_es_raw
OUTPUT_TO_ES_REGISTER = ARGS.output_to_es_register OUTPUT_TO_ES_REGISTER = ARGS.output_to_es_register
PLATFORM_LIST = ARGS.platform
proxies_num = ARGS.proxies_num
# #
# def func_search_keywordlist(platform): # def func_search_keywordlist(platform):
# search_body = {"query": {"bool": {"filter": []}}} # search_body = {"query": {"bool": {"filter": []}}}
...@@ -141,7 +142,7 @@ for platform in PLATFORM_LIST: ...@@ -141,7 +142,7 @@ for platform in PLATFORM_LIST:
search_pages_max=search_pages, search_pages_max=search_pages,
output_to_es_raw=OUTPUT_TO_ES_RAW, output_to_es_raw=OUTPUT_TO_ES_RAW,
output_to_es_register=OUTPUT_TO_ES_REGISTER, output_to_es_register=OUTPUT_TO_ES_REGISTER,
es_index=ES_INDEX,) es_index=ES_INDEX,proxies_num=proxies_num)
except Exception as e: except Exception as e:
print(e) print(e)
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment