def scan_from_redis(push_rule_class_list) -> Dict: def scan_from_redis(push_rule_class_list) -> Dict:
# len_id_list = rds.llen("doc_id") # len_id_list = rds.llen("doc_id")
set_name = "exists_doc_id_set_%s" %"%Y-%m-%d")
rds.sadd(set_name, "test")
rds.expire(set_name, 259200)
out_ts = * 1e3 - 86400000
while True: while True:
set_name = "exists_doc_id_set_%s" %"%Y-%m-%d")
rds.sadd(set_name, "test")
rds.expire(set_name, 259200)
out_ts = * 1e3 - 86400000
doc_id = rds.lpop("doc_id") doc_id = rds.lpop("doc_id")
if doc_id: if doc_id:
res = rds.llen(doc_id) res = rds.llen(doc_id)
# For details see man 4 crontabs
# Example of job definition:
# .---------------- minute (0 - 59)
# | .------------- hour (0 - 23)
# | | .---------- day of month (1 - 31)
# | | | .------- month (1 - 12) OR jan,feb,mar,apr ...
# | | | | .---- day of week (0 - 6) (Sunday=0 or 7) OR sun,mon,tue,wed,thu,fri,sat
# | | | | |
# * * * * * user-name command to be executed
# 1 update video data in target release index daily
0 0 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/ -p toutiao -n 10 -s 10
0 0 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/ -p 腾讯视频 -n 30 -s 10
0 0 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/ -p iqiyi -n 30 -s 10
0 0 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/ -p new_tudou -n 10 -s 10
# 2 write crawled data into short-video-prodtion index daily
0 05 * * * hanye python3 /home/hanye/crawlers/tasks/
# 3 create redis url batch
0 18 * * * hanye python3 /home/hanye/crawlers/crawler_sys/scheduler/ -p iqiyi -p 腾讯视频 -b 02 -d 30
0 10 * * * hanye python3 /home/hanye/crawlers/crawler_sys/scheduler/ -p iqiyi -p 腾讯视频 -b 02 -d 30
# 4 scrap redis url list
0 03 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/ -p 腾讯视频 -b 02
0 03 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/ -p iqiyi -b 02
0 03 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/ -p youku -n 30
# 5 scrap list pages
0 10 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/ -p iqiyi -n 30
0 10 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/ -p youku -n 30
0 10 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/ -p 腾讯视频 -n 30
0 7,10,15 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/ -p toutiao -n 160 -s 10
# update haokan target releaser
0 0,8,12,18 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/ -p haokan -n 20
# get tencent news search page
0 0,4,8,10,14,16,20 * * * hanye python3 /home/hanye/crawlersNew/crawler/crawler_sys/framework/ -p 腾讯新闻
#high frequency releasers ,crawler executes every hour, only for haokan currently
0 0,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22 * * * hanye python3 /home/hanye/crawlersNew/crawler/crawler_sys/framework/ -p haokan
#high frequency releasers. execute on 1 pm and 6pm
0 0,13,18 * * * hanye python3 /home/hanye/crawlersNew/crawler/crawler_sys/framework/ -p toutiao -fre 3 -n 20 -s 15
0 0,13,18 * * * hanye python3 /home/hanye/crawlersNew/crawler/crawler_sys/framework/ -p 腾讯视频 -fre 3 -n 20 -s 15
0 0,13,18 * * * hanye python3 /home/hanye/crawlersNew/crawler/crawler_sys/framework/ -p new_tudou -fre 3 -n 20 -s 15
