Commit 4d36fce6 authored by litaolemo's avatar litaolemo

update

parent 62991f3f
......@@ -11,8 +11,7 @@ from elasticsearch.helpers import scan
#rds=redis.StrictRedis(host='192.168.17.26',port=6379,db=0)
es_framework = Elasticsearch(hosts='192.168.17.11', port=80,
http_auth=('crawler', 'XBcasfo8dgfs'))
es_framework = Elasticsearch(hosts='172.16.32.37', port=9200)
index_target_releaser = 'target_releasers'
doc_type_target_releaser = 'doc'
......@@ -38,7 +37,8 @@ def bulk_write_target_releasers(dict_Lst,
if write_counter%1000==0 or write_counter==len(dict_Lst):
print('Writing into es %d/%d' % (write_counter, len(dict_Lst)))
if bulk_write_body!='':
es_framework.bulk(body=bulk_write_body, request_timeout=100)
es_framework.bulk(index=index_target_releaser,body=bulk_write_body, request_timeout=100)
def get_releaserUrls_from_es(platform,
releaser=None,
......@@ -57,7 +57,6 @@ def get_releaserUrls_from_es(platform,
search_body['query']['bool']['filter'].append(frequency_dict)
# print(target_index,doc_type_target_releaser,search_body)
search_resp=es_framework.search(index=target_index,
doc_type=doc_type_target_releaser,
body=search_body,
size=0,
request_timeout=100)
......@@ -67,7 +66,6 @@ def get_releaserUrls_from_es(platform,
print('Got %d releaserUrls for platform %s.' % (total_hit, platform))
scan_resp = scan(client=es_framework, query=search_body,
index=target_index,
doc_type=doc_type_target_releaser,
request_timeout=200)
for line in scan_resp:
try:
......
......@@ -16,7 +16,7 @@ Data in es will be update when run this program once.
"""
from crawler.crawler_sys.site_crawler_by_redis import (crawler_toutiao, crawler_v_qq, crawler_tudou, crawler_haokan,
crawler_tencent_news,
crawler_wangyi_news, crawler_kwai, crawler_douyin,toutiao_article)
crawler_wangyi_news, crawler_kwai, crawler_douyin,toutiao_article,crawler_weibo)
import sys
from crawler.crawler_sys.utils.output_results import output_result
import argparse, copy, datetime, time
......@@ -27,18 +27,18 @@ from concurrent.futures import ProcessPoolExecutor
import threading
from redis.sentinel import Sentinel
sentinel = Sentinel([('192.168.17.65', 26379),
('192.168.17.66', 26379),
('192.168.17.67', 26379)
], socket_timeout=1)
# 查看master节点
master = sentinel.discover_master('ida_redis_master')
# 查看slave 节点
slave = sentinel.discover_slaves('ida_redis_master')
# 连接数据库
rds_1 = sentinel.master_for('ida_redis_master', socket_timeout=1, db=1, decode_responses=True)
# sentinel = Sentinel([('192.168.17.65', 26379),
# ('192.168.17.66', 26379),
# ('192.168.17.67', 26379)
# ], socket_timeout=1)
# # 查看master节点
# master = sentinel.discover_master('ida_redis_master')
# # 查看slave 节点
# slave = sentinel.discover_slaves('ida_redis_master')
# # 连接数据库
# rds_1 = sentinel.master_for('ida_redis_master', socket_timeout=1, db=1, decode_responses=True)
# rds_1 = redis.StrictRedis(host='192.168.17.60', port=6379, db=1, decode_responses=True)
rds_1 = redis.StrictRedis(host='154.8.190.251', port=6379, db=19, decode_responses=True)
parser = argparse.ArgumentParser(description='Specify a platform name.')
parser.add_argument('-n', '--max_page', default=30, type=int,
......@@ -83,7 +83,8 @@ platform_crawler_reg = {
# 'Mango': crawler_mango,
'抖音': crawler_douyin.Crawler_douyin,
"网易新闻": crawler_wangyi_news.Crawler_wangyi_news,
"kwai": crawler_kwai.Crawler_kwai
"kwai": crawler_kwai.Crawler_kwai,
"weibo": crawler_weibo.Crawler_weibo
}
......
......@@ -23,17 +23,18 @@ import redis,json
from redis.sentinel import Sentinel
sentinel = Sentinel([('192.168.17.65', 26379),
('192.168.17.66', 26379),
('192.168.17.67', 26379)
],socket_timeout=0.5)
# sentinel = Sentinel([('192.168.17.65', 26379),
# ('192.168.17.66', 26379),
# ('192.168.17.67', 26379)
# ],socket_timeout=0.5)
# 查看master节点
master = sentinel.discover_master('ida_redis_master')
# master = sentinel.discover_master('ida_redis_master')
# 查看slave 节点
slave = sentinel.discover_slaves('ida_redis_master')
# slave = sentinel.discover_slaves('ida_redis_master')
# 连接数据库
rds = sentinel.master_for('ida_redis_master', socket_timeout=0.5, db=1, decode_responses=True)
# rds = redis.StrictRedis(host='192.168.17.60', port=6379, db=1, decode_responses=True)
# rds = sentinel.master_for('ida_redis_master', socket_timeout=0.5, db=1, decode_responses=True)
rds = redis.StrictRedis(host='154.8.190.251', port=6379, db=19, decode_responses=True)
parser = argparse.ArgumentParser(description='Specify a platform name.')
parser.add_argument('-p', '--platform', default=[], action='append',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment