Commit 7e478d56 authored by litaolemo's avatar litaolemo

update

parent f056f8f7
...@@ -71,8 +71,9 @@ parser.add_argument('-name', '--name', default="crawler01", type=str, ...@@ -71,8 +71,9 @@ parser.add_argument('-name', '--name', default="crawler01", type=str,
help=('this computer name')) help=('this computer name'))
args = parser.parse_args() args = parser.parse_args()
rds_1 = redis.StrictRedis(host='172.16.40.164', port=6379, db=19, decode_responses=True, password='ReDis!GmTx*0aN12') # rds_1 = redis.StrictRedis(host='172.16.40.164', port=6379, db=19, decode_responses=True, password='ReDis!GmTx*0aN12')
rds_1 = redis.StrictRedis(host='172.18.51.10', port=6379, db=19, decode_responses=True)
rds_17 = redis.StrictRedis(host='172.18.51.10', port=6379, db=17, decode_responses=True)
platform_crawler_reg = { platform_crawler_reg = {
'toutiao': crawler_toutiao.Crawler_toutiao, 'toutiao': crawler_toutiao.Crawler_toutiao,
'toutiao_article': toutiao_article.Crawler_toutiao_article, 'toutiao_article': toutiao_article.Crawler_toutiao_article,
...@@ -209,42 +210,43 @@ def single_thead(processe,name): ...@@ -209,42 +210,43 @@ def single_thead(processe,name):
if video_time: if video_time:
if start_time < video_time: if start_time < video_time:
if video_time < end_time: if video_time < end_time:
data_list.append(single_data) rds_1.hset("weibo", key=single_data["doc_id"], value=json.dumps(single_data))
# data_list.append(single_data)
else: else:
count_false += 1 count_false += 1
if count_false > allow*3: if count_false > allow*3:
break break
else: else:
data_list.append(single_data) rds_1.hset("weibo", key=single_data["doc_id"], value=json.dumps(single_data))
if len(data_list) >= 100: # if len(data_list) >= 100:
output_result(result_Lst=data_list, # output_result(result_Lst=data_list,
platform=platform, # platform=platform,
output_to_file=output_to_file, # output_to_file=output_to_file,
filepath=None, # filepath=None,
push_to_redis=push_to_redis, # push_to_redis=push_to_redis,
output_to_es_raw=output_to_es_raw, # output_to_es_raw=output_to_es_raw,
es_index=es_index, # es_index=es_index,
output_to_es_register=output_to_es_register) # output_to_es_register=output_to_es_register)
print(len(data_list)) # print(len(data_list))
data_list.clear() # data_list.clear()
print("processe"+ str(processe) + " " +threading.current_thread().name + " down " + platform + str(count)) print("processe"+ str(processe) + " " +threading.current_thread().name + " down " + platform + str(count))
if not count_has: if not count_has:
releaser_body["mssage"] = "爬取失败,请检查账号" releaser_body["mssage"] = "爬取失败,请检查账号"
rds_1.hset("error",releaser_body["platform"] + "/" +releaser_body["releaserUrl"],json.dumps(releaser_body)) rds_1.hset("error",releaser_body["platform"] + "/" +releaser_body["releaserUrl"],json.dumps(releaser_body))
if data_list != []: # if data_list != []:
output_result(result_Lst=data_list, # output_result(result_Lst=data_list,
platform=platform, # platform=platform,
output_to_file=output_to_file, # output_to_file=output_to_file,
push_to_redis=push_to_redis, # push_to_redis=push_to_redis,
filepath=None, # filepath=None,
output_to_es_raw=output_to_es_raw, # output_to_es_raw=output_to_es_raw,
es_index=es_index, # es_index=es_index,
output_to_es_register=output_to_es_register, # output_to_es_register=output_to_es_register,
expire=86400) # expire=86400)
print(len(data_list)) # print(len(data_list))
data_list.clear() # data_list.clear()
except Exception as e: except Exception as e:
print(e) print(e)
......
...@@ -7,4 +7,4 @@ ps aux|grep update_data_in_target_releasers_multi_process_by_date_from_redis|gre ...@@ -7,4 +7,4 @@ ps aux|grep update_data_in_target_releasers_multi_process_by_date_from_redis|gre
ps aux|grep cal_ni_and_put_to_backend|grep -v grep|cut -c 9-15|xargs kill -15 ps aux|grep cal_ni_and_put_to_backend|grep -v grep|cut -c 9-15|xargs kill -15
current_date=`date -d "-1 day" "+%Y%m%d"` current_date=`date -d "-1 day" "+%Y%m%d"`
python /srv/apps/crawler_old/crawler/crawler_sys/framework/update_data_in_target_releasers_multi_process_by_date_from_redis.py > /data/log/crawler/write_task_${current_date}_.log & python /srv/apps/crawler_old/crawler/crawler_sys/framework/update_data_in_target_releasers_multi_process_by_date_from_redis.py > /data/log/crawler/write_task_${current_date}_.log &
python /srv/apps/crawler_old/crawler/crawler_sys/scheduler/cal_ni_and_put_to_backend.py > /data/log/crawler/.log & #python /srv/apps/crawler_old/crawler/crawler_sys/scheduler/cal_ni_and_put_to_backend.py > /data/log/crawler/.log &
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment