Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
C
crawler
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
backend
crawler
Commits
61e83a10
Commit
61e83a10
authored
4 years ago
by
litaolemo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
ec314915
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
1 addition
and
56 deletions
+1
-56
cal_ni_and_put_to_backend.py
crawler_sys/scheduler/cal_ni_and_put_to_backend.py
+1
-1
yujiang-zhou-03492e71-21ea-4f52-9fdd-b9dbe21cc8da-v5.json
...yujiang-zhou-03492e71-21ea-4f52-9fdd-b9dbe21cc8da-v5.json
+0
-2
crontab
tasks/crontab/192.168.18.11/etc/crontab
+0
-53
No files found.
crawler_sys/scheduler/cal_ni_and_put_to_backend.py
View file @
61e83a10
...
...
@@ -112,11 +112,11 @@ class push_rule(object):
def
scan_from_redis
(
push_rule_class_list
)
->
Dict
:
# len_id_list = rds.llen("doc_id")
while
True
:
set_name
=
"exists_doc_id_set_
%
s"
%
datetime
.
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d"
)
rds
.
sadd
(
set_name
,
"test"
)
rds
.
expire
(
set_name
,
259200
)
out_ts
=
datetime
.
datetime
.
now
()
.
timestamp
()
*
1e3
-
86400000
while
True
:
doc_id
=
rds
.
lpop
(
"doc_id"
)
if
doc_id
:
res
=
rds
.
llen
(
doc_id
)
...
...
This diff is collapsed.
Click to expand it.
maintenance/EsLicense/yujiang-zhou-03492e71-21ea-4f52-9fdd-b9dbe21cc8da-v5.json
deleted
100644 → 0
View file @
ec314915
{
"license"
:{
"uid"
:
"03492e71-21ea-4f52-9fdd-b9dbe21cc8da"
,
"type"
:
"basic"
,
"issue_date_in_millis"
:
1571961600000
,
"expiry_date_in_millis"
:
1603670399999
,
"max_nodes"
:
100
,
"issued_to"
:
"YuJiang Zhou (csm)"
,
"issuer"
:
"Web Form"
,
"signature"
:
"AAAAAwAAAA1+zC+3Nc7L82v4y8hvAAABmC9ZN0hjZDBGYnVyRXpCOW5Bb3FjZDAxOWpSbTVoMVZwUzRxVk1PSmkxaktJRVl5MUYvUWh3bHZVUTllbXNPbzBUemtnbWpBbmlWRmRZb25KNFlBR2x0TXc2K2p1Y1VtMG1UQU9TRGZVSGRwaEJGUjE3bXd3LzRqZ05iLzRteWFNekdxRGpIYlFwYkJiNUs0U1hTVlJKNVlXekMrSlVUdFIvV0FNeWdOYnlESDc3MWhlY3hSQmdKSjJ2ZTcvYlBFOHhPQlV3ZHdDQ0tHcG5uOElCaDJ4K1hob29xSG85N0kvTWV3THhlQk9NL01VMFRjNDZpZEVXeUtUMXIyMlIveFpJUkk2WUdveEZaME9XWitGUi9WNTZVQW1FMG1DenhZU0ZmeXlZakVEMjZFT2NvOWxpZGlqVmlHNC8rWVVUYzMwRGVySHpIdURzKzFiRDl4TmM1TUp2VTBOUlJZUlAyV0ZVL2kvVk10L0NsbXNFYVZwT3NSU082dFNNa2prQ0ZsclZ4NTltbU1CVE5lR09Bck93V2J1Y3c9PQAAAQCO/MqlprtquUMe/M6sXV7TdP8yFjvFAkIi7yMSQemhy3ORqgjk+jFLu0LtKtD051cy6PjKGP8qvbrYQTFIIU0PiMW5dVfHbGA75EbXOExhW1tSyiKvFNBb0ewCXdQVL+CwQFxtJ5oRmgzyKlYXxuS3gyb2fNgbRTnM6anLExA8WiJJTpAZ77xiHlN/rXSk9+VqdpEtSHai6/2KtgF+ENFMIgOcX5yXB3tWdMq2R6toPtEk1Mdg82XR5e1NVFWnBxARqym0rikaarkdARrliQpzoVZGsFUlgL27hGoRNXEdKydsE3aBla5yUoiwHqQeMc/cfsLnMdp71Tg08XfwnGNA"
,
"start_date_in_millis"
:
1571961600000
}}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
tasks/crontab/192.168.18.11/etc/crontab
deleted
100644 → 0
View file @
ec314915
SHELL=/bin/bash
PATH=/sbin:/bin:/usr/sbin:/usr/bin
MAILTO=hanye
HOME=/
# For details see man 4 crontabs
# Example of job definition:
# .---------------- minute (0 - 59)
# | .------------- hour (0 - 23)
# | | .---------- day of month (1 - 31)
# | | | .------- month (1 - 12) OR jan,feb,mar,apr ...
# | | | | .---- day of week (0 - 6) (Sunday=0 or 7) OR sun,mon,tue,wed,thu,fri,sat
# | | | | |
# * * * * * user-name command to be executed
# 1 update video data in target release index daily
0 0 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/update_data_in_target_releasers_multi_process.py -p toutiao -n 10 -s 10
0 0 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/update_data_in_target_releasers_multi_process.py -p 腾讯视频 -n 30 -s 10
0 0 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/update_data_in_target_releasers_multi_process.py -p iqiyi -n 30 -s 10
0 0 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/update_data_in_target_releasers_multi_process.py -p new_tudou -n 10 -s 10
# 2 write crawled data into short-video-prodtion index daily
0 05 * * * hanye python3 /home/hanye/crawlers/tasks/update_DU_ATU_from_crawler_raw.py
# 3 create redis url batch
0 18 * * * hanye python3 /home/hanye/crawlers/crawler_sys/scheduler/generate_redis_url_batch.py -p iqiyi -p 腾讯视频 -b 02 -d 30
0 10 * * * hanye python3 /home/hanye/crawlers/crawler_sys/scheduler/generate_redis_url_batch.py -p iqiyi -p 腾讯视频 -b 02 -d 30
# 4 scrap redis url list
0 03 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/scrap_redis_urls.py -p 腾讯视频 -b 02
0 03 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/scrap_redis_urls.py -p iqiyi -b 02
0 03 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/scrap_list_pages.py -p youku -n 30
# 5 scrap list pages
0 10 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/scrap_list_pages_multi_process.py -p iqiyi -n 30
0 10 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/scrap_list_pages_multi_process.py -p youku -n 30
0 10 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/scrap_list_pages_multi_process.py -p 腾讯视频 -n 30
0 7,10,15 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/scrap_list_pages_multi_process.py -p toutiao -n 160 -s 10
# update haokan target releaser
0 0,8,12,18 * * * hanye python3 /home/hanye/crawlers/crawler_sys/framework/update_data_in_target_releasers_single_thread.py -p haokan -n 20
# get tencent news search page
0 0,4,8,10,14,16,20 * * * hanye python3 /home/hanye/crawlersNew/crawler/crawler_sys/framework/search_page_single_process.py -p 腾讯新闻
#high frequency releasers ,crawler executes every hour, only for haokan currently
0 0,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22 * * * hanye python3 /home/hanye/crawlersNew/crawler/crawler_sys/framework/high_fre_releasers.py -p haokan
#high frequency releasers. execute on 1 pm and 6pm
0 0,13,18 * * * hanye python3 /home/hanye/crawlersNew/crawler/crawler_sys/framework/update_data_in_target_releasers_multi_process.py -p toutiao -fre 3 -n 20 -s 15
0 0,13,18 * * * hanye python3 /home/hanye/crawlersNew/crawler/crawler_sys/framework/update_data_in_target_releasers_multi_process.py -p 腾讯视频 -fre 3 -n 20 -s 15
0 0,13,18 * * * hanye python3 /home/hanye/crawlersNew/crawler/crawler_sys/framework/update_data_in_target_releasers_multi_process.py -p new_tudou -fre 3 -n 20 -s 15
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment