Commit 61a6c050 authored by litaolemo's avatar litaolemo

update

parent 4bf6b606
...@@ -54,7 +54,7 @@ parser.add_argument('-f', '--output_file_path', default='', type=str, ...@@ -54,7 +54,7 @@ parser.add_argument('-f', '--output_file_path', default='', type=str,
help=('Specify output file path, default None.')) help=('Specify output file path, default None.'))
parser.add_argument('-r', '--push_to_redis', default="True", type=str, parser.add_argument('-r', '--push_to_redis', default="True", type=str,
help=('Write urls to redis or not, default to True')) help=('Write urls to redis or not, default to True'))
parser.add_argument('-w', '--output_to_es_raw', default='True', type=str, parser.add_argument('-w', '--output_to_es_raw', default='False', type=str,
help=('Write data into es or not, default to True')) help=('Write data into es or not, default to True'))
parser.add_argument('-index', '--es_index', default='crawler-data-raw', type=str, parser.add_argument('-index', '--es_index', default='crawler-data-raw', type=str,
help=('assign a es_index to write into, default to crawler-data-raw')) help=('assign a es_index to write into, default to crawler-data-raw'))
...@@ -204,6 +204,7 @@ def single_thead(processe,name): ...@@ -204,6 +204,7 @@ def single_thead(processe,name):
platform=platform, platform=platform,
output_to_file=output_to_file, output_to_file=output_to_file,
filepath=None, filepath=None,
push_to_redis=push_to_redis,
output_to_es_raw=output_to_es_raw, output_to_es_raw=output_to_es_raw,
es_index=es_index, es_index=es_index,
output_to_es_register=output_to_es_register) output_to_es_register=output_to_es_register)
...@@ -219,6 +220,7 @@ def single_thead(processe,name): ...@@ -219,6 +220,7 @@ def single_thead(processe,name):
output_result(result_Lst=data_list, output_result(result_Lst=data_list,
platform=platform, platform=platform,
output_to_file=output_to_file, output_to_file=output_to_file,
push_to_redis=push_to_redis,
filepath=None, filepath=None,
output_to_es_raw=output_to_es_raw, output_to_es_raw=output_to_es_raw,
es_index=es_index, es_index=es_index,
......
...@@ -174,6 +174,7 @@ class Crawler_weibo(): ...@@ -174,6 +174,7 @@ class Crawler_weibo():
"releaserUrl": "https://www.weibo.com/u/%s" % releaser_id, "releaserUrl": "https://www.weibo.com/u/%s" % releaser_id,
"releaser_id_str": "weibo_%s" % releaser_id, "releaser_id_str": "weibo_%s" % releaser_id,
"img_list":self.get_img(mblog), "img_list":self.get_img(mblog),
"platform":"weibo",
# "doc_id":doc_id # "doc_id":doc_id
} }
res_dic["doc_id"] = cal_doc_id(platform="weibo", url=one["scheme"], data_dict=res_dic, res_dic["doc_id"] = cal_doc_id(platform="weibo", url=one["scheme"], data_dict=res_dic,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment