Commit 61e83a10 authored by litaolemo's avatar litaolemo

update

parent ec314915
...@@ -112,11 +112,11 @@ class push_rule(object): ...@@ -112,11 +112,11 @@ class push_rule(object):
def scan_from_redis(push_rule_class_list) -> Dict: def scan_from_redis(push_rule_class_list) -> Dict:
# len_id_list = rds.llen("doc_id") # len_id_list = rds.llen("doc_id")
set_name = "exists_doc_id_set_%s" % datetime.datetime.now().strftime("%Y-%m-%d")
rds.sadd(set_name, "test")
rds.expire(set_name, 259200)
out_ts = datetime.datetime.now().timestamp() * 1e3 - 86400000
while True: while True:
set_name = "exists_doc_id_set_%s" % datetime.datetime.now().strftime("%Y-%m-%d")
rds.sadd(set_name, "test")
rds.expire(set_name, 259200)
out_ts = datetime.datetime.now().timestamp() * 1e3 - 86400000
doc_id = rds.lpop("doc_id") doc_id = rds.lpop("doc_id")
if doc_id: if doc_id:
res = rds.llen(doc_id) res = rds.llen(doc_id)
......
{"license":{"uid":"03492e71-21ea-4f52-9fdd-b9dbe21cc8da","type":"basic","issue_date_in_millis":1571961600000,"expiry_date_in_millis":1603670399999,"max_nodes":100,"issued_to":"YuJiang Zhou (csm)","issuer":"Web Form","signature":"AAAAAwAAAA1+zC+3Nc7L82v4y8hvAAABmC9ZN0hjZDBGYnVyRXpCOW5Bb3FjZDAxOWpSbTVoMVZwUzRxVk1PSmkxaktJRVl5MUYvUWh3bHZVUTllbXNPbzBUemtnbWpBbmlWRmRZb25KNFlBR2x0TXc2K2p1Y1VtMG1UQU9TRGZVSGRwaEJGUjE3bXd3LzRqZ05iLzRteWFNekdxRGpIYlFwYkJiNUs0U1hTVlJKNVlXekMrSlVUdFIvV0FNeWdOYnlESDc3MWhlY3hSQmdKSjJ2ZTcvYlBFOHhPQlV3ZHdDQ0tHcG5uOElCaDJ4K1hob29xSG85N0kvTWV3THhlQk9NL01VMFRjNDZpZEVXeUtUMXIyMlIveFpJUkk2WUdveEZaME9XWitGUi9WNTZVQW1FMG1DenhZU0ZmeXlZakVEMjZFT2NvOWxpZGlqVmlHNC8rWVVUYzMwRGVySHpIdURzKzFiRDl4TmM1TUp2VTBOUlJZUlAyV0ZVL2kvVk10L0NsbXNFYVZwT3NSU082dFNNa2prQ0ZsclZ4NTltbU1CVE5lR09Bck93V2J1Y3c9PQAAAQCO/MqlprtquUMe/M6sXV7TdP8yFjvFAkIi7yMSQemhy3ORqgjk+jFLu0LtKtD051cy6PjKGP8qvbrYQTFIIU0PiMW5dVfHbGA75EbXOExhW1tSyiKvFNBb0ewCXdQVL+CwQFxtJ5oRmgzyKlYXxuS3gyb2fNgbRTnM6anLExA8WiJJTpAZ77xiHlN/rXSk9+VqdpEtSHai6/2KtgF+ENFMIgOcX5yXB3tWdMq2R6toPtEk1Mdg82XR5e1NVFWnBxARqym0rikaarkdARrliQpzoVZGsFUlgL27hGoRNXEdKydsE3aBla5yUoiwHqQeMc/cfsLnMdp71Tg08XfwnGNA","start_date_in_millis":1571961600000}}
\ No newline at end of file
SHELL=/bin/bash
PATH=/sbin:/bin:/usr/sbin:/usr/bin
MAILTO=hanye
HOME=/
# For details see man 4 crontabs
# Example of job definition:
# .---------------- minute (0 - 59)
# | .------------- hour (0 - 23)
# | | .---------- day of month (1 - 31)
# | | | .------- month (1 - 12) OR jan,feb,mar,apr ...
# | | | | .---- day of week (0 - 6) (Sunday=0 or 7) OR sun,mon,tue,wed,thu,fri,sat
# | | | | |
# * * * * * user-name command to be executed
# 1 update video data in target release index daily
0 0 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/update_data_in_target_releasers_multi_process.py -p toutiao -n 10 -s 10
0 0 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/update_data_in_target_releasers_multi_process.py -p 腾讯视频 -n 30 -s 10
0 0 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/update_data_in_target_releasers_multi_process.py -p iqiyi -n 30 -s 10
0 0 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/update_data_in_target_releasers_multi_process.py -p new_tudou -n 10 -s 10
# 2 write crawled data into short-video-prodtion index daily
0 05 * * * hanye python3 /home/hanye/crawlers/tasks/update_DU_ATU_from_crawler_raw.py
# 3 create redis url batch
0 18 * * * hanye python3 /home/hanye/crawlers/crawler_sys/scheduler/generate_redis_url_batch.py -p iqiyi -p 腾讯视频 -b 02 -d 30
0 10 * * * hanye python3 /home/hanye/crawlers/crawler_sys/scheduler/generate_redis_url_batch.py -p iqiyi -p 腾讯视频 -b 02 -d 30
# 4 scrap redis url list
0 03 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/scrap_redis_urls.py -p 腾讯视频 -b 02
0 03 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/scrap_redis_urls.py -p iqiyi -b 02
0 03 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/scrap_list_pages.py -p youku -n 30
# 5 scrap list pages
0 10 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/scrap_list_pages_multi_process.py -p iqiyi -n 30
0 10 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/scrap_list_pages_multi_process.py -p youku -n 30
0 10 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/scrap_list_pages_multi_process.py -p 腾讯视频 -n 30
0 7,10,15 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/scrap_list_pages_multi_process.py -p toutiao -n 160 -s 10
# update haokan target releaser
0 0,8,12,18 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/update_data_in_target_releasers_single_thread.py -p haokan -n 20
# get tencent news search page
0 0,4,8,10,14,16,20 * * * hanye python3 /home/hanye/crawlersNew/crawler/crawler_sys/framework/search_page_single_process.py -p 腾讯新闻
#high frequency releasers ,crawler executes every hour, only for haokan currently
0 0,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22 * * * hanye python3 /home/hanye/crawlersNew/crawler/crawler_sys/framework/high_fre_releasers.py -p haokan
#high frequency releasers. execute on 1 pm and 6pm
0 0,13,18 * * * hanye python3 /home/hanye/crawlersNew/crawler/crawler_sys/framework/update_data_in_target_releasers_multi_process.py -p toutiao -fre 3 -n 20 -s 15
0 0,13,18 * * * hanye python3 /home/hanye/crawlersNew/crawler/crawler_sys/framework/update_data_in_target_releasers_multi_process.py -p 腾讯视频 -fre 3 -n 20 -s 15
0 0,13,18 * * * hanye python3 /home/hanye/crawlersNew/crawler/crawler_sys/framework/update_data_in_target_releasers_multi_process.py -p new_tudou -fre 3 -n 20 -s 15
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment