Commit 7e478d56 authored by litaolemo's avatar litaolemo

update

parent f056f8f7
......@@ -71,8 +71,9 @@ parser.add_argument('-name', '--name', default="crawler01", type=str,
help=('this computer name'))
args = parser.parse_args()
rds_1 = redis.StrictRedis(host='172.16.40.164', port=6379, db=19, decode_responses=True, password='ReDis!GmTx*0aN12')
# rds_1 = redis.StrictRedis(host='172.16.40.164', port=6379, db=19, decode_responses=True, password='ReDis!GmTx*0aN12')
rds_1 = redis.StrictRedis(host='172.18.51.10', port=6379, db=19, decode_responses=True)
rds_17 = redis.StrictRedis(host='172.18.51.10', port=6379, db=17, decode_responses=True)
platform_crawler_reg = {
'toutiao': crawler_toutiao.Crawler_toutiao,
'toutiao_article': toutiao_article.Crawler_toutiao_article,
......@@ -209,42 +210,43 @@ def single_thead(processe,name):
if video_time:
if start_time < video_time:
if video_time < end_time:
data_list.append(single_data)
rds_1.hset("weibo", key=single_data["doc_id"], value=json.dumps(single_data))
# data_list.append(single_data)
else:
count_false += 1
if count_false > allow*3:
break
else:
data_list.append(single_data)
if len(data_list) >= 100:
output_result(result_Lst=data_list,
platform=platform,
output_to_file=output_to_file,
filepath=None,
push_to_redis=push_to_redis,
output_to_es_raw=output_to_es_raw,
es_index=es_index,
output_to_es_register=output_to_es_register)
print(len(data_list))
data_list.clear()
rds_1.hset("weibo", key=single_data["doc_id"], value=json.dumps(single_data))
# if len(data_list) >= 100:
# output_result(result_Lst=data_list,
# platform=platform,
# output_to_file=output_to_file,
# filepath=None,
# push_to_redis=push_to_redis,
# output_to_es_raw=output_to_es_raw,
# es_index=es_index,
# output_to_es_register=output_to_es_register)
# print(len(data_list))
# data_list.clear()
print("processe"+ str(processe) + " " +threading.current_thread().name + " down " + platform + str(count))
if not count_has:
releaser_body["mssage"] = "爬取失败,请检查账号"
rds_1.hset("error",releaser_body["platform"] + "/" +releaser_body["releaserUrl"],json.dumps(releaser_body))
if data_list != []:
output_result(result_Lst=data_list,
platform=platform,
output_to_file=output_to_file,
push_to_redis=push_to_redis,
filepath=None,
output_to_es_raw=output_to_es_raw,
es_index=es_index,
output_to_es_register=output_to_es_register,
expire=86400)
print(len(data_list))
data_list.clear()
# if data_list != []:
# output_result(result_Lst=data_list,
# platform=platform,
# output_to_file=output_to_file,
# push_to_redis=push_to_redis,
# filepath=None,
# output_to_es_raw=output_to_es_raw,
# es_index=es_index,
# output_to_es_register=output_to_es_register,
# expire=86400)
# print(len(data_list))
# data_list.clear()
except Exception as e:
print(e)
......
......@@ -7,4 +7,4 @@ ps aux|grep update_data_in_target_releasers_multi_process_by_date_from_redis|gre
ps aux|grep cal_ni_and_put_to_backend|grep -v grep|cut -c 9-15|xargs kill -15
current_date=`date -d "-1 day" "+%Y%m%d"`
python /srv/apps/crawler_old/crawler/crawler_sys/framework/update_data_in_target_releasers_multi_process_by_date_from_redis.py > /data/log/crawler/write_task_${current_date}_.log &
python /srv/apps/crawler_old/crawler/crawler_sys/scheduler/cal_ni_and_put_to_backend.py > /data/log/crawler/.log &
\ No newline at end of file
#python /srv/apps/crawler_old/crawler/crawler_sys/scheduler/cal_ni_and_put_to_backend.py > /data/log/crawler/.log &
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment