Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
C
crawler
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
backend
crawler
Commits
4dfed1b9
Commit
4dfed1b9
authored
Jul 21, 2020
by
litaolemo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
d83f79a7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
26 additions
and
35 deletions
+26
-35
video_fields_std.py
crawler_sys/framework/video_fields_std.py
+1
-1
func_get_proxy_form_kuaidaili.py
crawler_sys/proxy_pool/func_get_proxy_form_kuaidaili.py
+10
-10
crawler_toutiao.py
crawler_sys/site_crawler_test/crawler_toutiao.py
+0
-0
craw_data_and_write_into_weekly_index_remove_duplication.py
...aw_data_and_write_into_weekly_index_remove_duplication.py
+1
-1
func_cal_doc_id.py
write_data_into_es/func_cal_doc_id.py
+11
-20
func_calculate_txxw_video_id.py
write_data_into_es/func_calculate_txxw_video_id.py
+1
-1
get_qingbo_reback_data_from_ftp.py
write_data_into_es/get_qingbo_reback_data_from_ftp.py
+2
-2
No files found.
crawler_sys/framework/video_fields_std.py
View file @
4dfed1b9
...
...
@@ -8,7 +8,7 @@ Created on Mon Feb 26 17:57:38 2018
class
Std_fields_video
:
def
__init__
(
self
,
data_provider
=
None
):
if
data_provider
==
None
:
data_provider
=
'
BDD
'
data_provider
=
'
gengmei
'
self
.
video_data
=
{
'platform'
:
None
,
'channel'
:
None
,
...
...
crawler_sys/proxy_pool/func_get_proxy_form_kuaidaili.py
View file @
4dfed1b9
...
...
@@ -13,16 +13,16 @@ import kdl,requests
from
redis.sentinel
import
Sentinel
sentinel
=
Sentinel
([(
'192.168.17.65'
,
26379
),
(
'192.168.17.66'
,
26379
),
(
'192.168.17.67'
,
26379
)
],
socket_timeout
=
0.5
)
# 查看master节点
master
=
sentinel
.
discover_master
(
'ida_redis_master'
)
# 查看slave 节点
slave
=
sentinel
.
discover_slaves
(
'ida_redis_master'
)
# 连接数据库
rds
=
sentinel
.
master_for
(
'ida_redis_master'
,
socket_timeout
=
0.5
,
db
=
7
,
decode_responses
=
True
)
#
sentinel = Sentinel([('192.168.17.65', 26379),
#
('192.168.17.66', 26379),
#
('192.168.17.67', 26379)
#
], socket_timeout=0.5)
#
#
查看master节点
#
master = sentinel.discover_master('ida_redis_master')
#
#
查看slave 节点
#
slave = sentinel.discover_slaves('ida_redis_master')
#
#
连接数据库
#
rds = sentinel.master_for('ida_redis_master', socket_timeout=0.5, db=7, decode_responses=True)
# rds = redis.StrictRedis(host='192.168.17.60', port=6378, db=7, decode_responses=True)
def
get_proxy_from_redis
():
try
:
...
...
crawler_sys/site_crawler_test/crawler_toutiao.py
View file @
4dfed1b9
This diff is collapsed.
Click to expand it.
crawler_sys/tools/craw_data_and_write_into_weekly_index_remove_duplication.py
View file @
4dfed1b9
...
...
@@ -15,7 +15,7 @@ from elasticsearch.helpers import scan
from
func_find_week_num
import
find_week_belongs_to
from
crawler.crawler_sys.framework.platform_crawler_register
import
get_crawler
from
crawler.crawler_sys.utils
import
trans_format
from
func_cal_doc_id
import
cal_doc_id
from
write_data_into_es.
func_cal_doc_id
import
cal_doc_id
hosts
=
'192.168.17.11'
port
=
80
...
...
write_data_into_es/func_cal_doc_id.py
View file @
4dfed1b9
...
...
@@ -6,26 +6,17 @@ Created on Wed Jun 20 09:19:12 2018
"""
import
hashlib
try
:
from
write_data_into_es.func_calculate_toutiao_video_id
import
calculate_toutiao_video_id
from
write_data_into_es.func_calculate_newTudou_video_id
import
calculate_newTudou_video_id
from
write_data_into_es.func_calculate_v_qq_video_id
import
calculate_v_qq_video_id
#from func_calculate_kwai_video_id_by_data import calculate_kwai_video_id_by_data
from
write_data_into_es.func_calculate_kwai_video_id_by_url
import
calculate_kwai_video_id_by_data_by_url
from
write_data_into_es.func_calculate_txxw_video_id
import
calculate_txxw_video_id
from
write_data_into_es.func_calculate_wangyi_news_id
import
calculate_wangyi_news_id
from
write_data_into_es.func_calculate_douyin_id
import
calculate_douyin_id
from
write_data_into_es.func_calculate_haokan_video_id
import
calculate_haokan_id
except
:
from
write_data_into_es_new.func_calculate_toutiao_video_id
import
calculate_toutiao_video_id
from
write_data_into_es_new.func_calculate_newTudou_video_id
import
calculate_newTudou_video_id
from
write_data_into_es_new.func_calculate_v_qq_video_id
import
calculate_v_qq_video_id
# from func_calculate_kwai_video_id_by_data import calculate_kwai_video_id_by_data
from
write_data_into_es_new.func_calculate_kwai_video_id_by_url
import
calculate_kwai_video_id_by_data_by_url
from
write_data_into_es_new.func_calculate_txxw_video_id
import
calculate_txxw_video_id
from
write_data_into_es_new.func_calculate_wangyi_news_id
import
calculate_wangyi_news_id
from
write_data_into_es_new.func_calculate_douyin_id
import
calculate_douyin_id
from
write_data_into_es_new.func_calculate_haokan_video_id
import
calculate_haokan_id
from
write_data_into_es.func_calculate_toutiao_video_id
import
calculate_toutiao_video_id
from
write_data_into_es.func_calculate_newTudou_video_id
import
calculate_newTudou_video_id
from
write_data_into_es.func_calculate_v_qq_video_id
import
calculate_v_qq_video_id
#from func_calculate_kwai_video_id_by_data import calculate_kwai_video_id_by_data
from
write_data_into_es.func_calculate_kwai_video_id_by_url
import
calculate_kwai_video_id_by_data_by_url
from
write_data_into_es.func_calculate_txxw_video_id
import
calculate_txxw_video_id
from
write_data_into_es.func_calculate_wangyi_news_id
import
calculate_wangyi_news_id
from
write_data_into_es.func_calculate_douyin_id
import
calculate_douyin_id
from
write_data_into_es.func_calculate_haokan_video_id
import
calculate_haokan_id
def
vid_cal_func
(
platform
):
vid_cal_func_dict
=
{
...
...
write_data_into_es/func_calculate_txxw_video_id.py
View file @
4dfed1b9
from
func_get_releaser_id
import
get_releaser_id
from
write_data_into_es.
func_get_releaser_id
import
get_releaser_id
def
calculate_txxw_video_id
(
data_dict
):
try
:
...
...
write_data_into_es/get_qingbo_reback_data_from_ftp.py
View file @
4dfed1b9
...
...
@@ -7,11 +7,11 @@ from elasticsearch import Elasticsearch
import
json
,
copy
from
write_data_into_es.func_get_releaser_id
import
get_releaser_id
from
write_data_into_es.func_cal_doc_id
import
cal_doc_id
from
func_transfer_from_ftp
import
transfer_from_ftp
from
write_data_into_es.
func_transfer_from_ftp
import
transfer_from_ftp
import
logging
from
urllib.parse
import
parse_qs
,
urlparse
from
elasticsearch.helpers
import
scan
from
ReleaserMeta
import
ReleaseMeta
from
write_data_into_es.
ReleaserMeta
import
ReleaseMeta
hosts
=
'192.168.17.11'
port
=
80
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment