Commit 4dfed1b9 authored by litaolemo's avatar litaolemo

update

parent d83f79a7
......@@ -8,7 +8,7 @@ Created on Mon Feb 26 17:57:38 2018
class Std_fields_video:
def __init__(self, data_provider=None):
if data_provider==None:
data_provider='BDD'
data_provider='gengmei'
self.video_data={
'platform': None,
'channel': None,
......
......@@ -13,16 +13,16 @@ import kdl,requests
from redis.sentinel import Sentinel
sentinel = Sentinel([('192.168.17.65', 26379),
('192.168.17.66', 26379),
('192.168.17.67', 26379)
], socket_timeout=0.5)
# 查看master节点
master = sentinel.discover_master('ida_redis_master')
# 查看slave 节点
slave = sentinel.discover_slaves('ida_redis_master')
# 连接数据库
rds = sentinel.master_for('ida_redis_master', socket_timeout=0.5, db=7, decode_responses=True)
# sentinel = Sentinel([('192.168.17.65', 26379),
# ('192.168.17.66', 26379),
# ('192.168.17.67', 26379)
# ], socket_timeout=0.5)
# # 查看master节点
# master = sentinel.discover_master('ida_redis_master')
# # 查看slave 节点
# slave = sentinel.discover_slaves('ida_redis_master')
# # 连接数据库
# rds = sentinel.master_for('ida_redis_master', socket_timeout=0.5, db=7, decode_responses=True)
# rds = redis.StrictRedis(host='192.168.17.60', port=6378, db=7, decode_responses=True)
def get_proxy_from_redis():
try:
......
......@@ -22,7 +22,7 @@ import urllib
try:
from crawler_sys.framework.func_get_releaser_id import *
except:
from func_get_releaser_id import *
from write_data_into_es.func_get_releaser_id import *
import requests
from crawler.crawler_sys.proxy_pool.func_get_proxy_form_kuaidaili import get_proxy
from crawler.crawler_sys.framework.get_redirect_resp import get_redirected_resp
......@@ -32,8 +32,9 @@ from crawler.crawler_sys.site_crawler.toutiao_get_signature import getHoney
from crawler.crawler_sys.utils.output_results import output_result
from crawler.crawler_sys.utils.output_results import retry_get_url
from crawler.crawler_sys.utils.trans_duration_str_to_second import trans_duration
# from crawler.crawler_sys.utils import output_log
from crawler.crawler_sys.utils import output_log
from crawler.crawler_sys.utils.util_logging import logged
from write_data_into_es.func_cal_doc_id import cal_doc_id
class Crawler_toutiao():
......@@ -48,32 +49,32 @@ class Crawler_toutiao():
self.video_data = std_fields.video_data
self.video_data['platform'] = self.platform
# remove fields that crawled data don't have
pop_key_Lst = ['channel', 'describe', 'isOriginal', "repost_count", "video_id"]
pop_key_Lst = ['channel', 'describe', 'isOriginal', "video_id"]
for popk in pop_key_Lst:
self.video_data.pop(popk)
self.releaser_url_pattern = 'http://www.365yg.com/c/user/[RELEASER_ID]/'
self.list_page_url_dict = {'all_channel': (
'https://www.365yg.com/api/pc/feed/?max_behot_time=0'
'&category=video_new&utm_source=toutiao')}
'https://www.365yg.com/api/pc/feed/?max_behot_time=0'
'&category=video_new&utm_source=toutiao')}
self.legal_list_page_urls = []
self.legal_channels = []
self.api_list = [
"ic",
"is",
"api3-normal-c-hl",
"ib",
"api3-normal-c-lf",
"id",
"ie",
"api3-normal-c-lq",
"ii",
"io",
"it",
"iu",
"lf",
"lg",
"lh",
"ic",
"is",
"api3-normal-c-hl",
"ib",
"api3-normal-c-lf",
"id",
"ie",
"api3-normal-c-lq",
"ii",
"io",
"it",
"iu",
"lf",
"lg",
"lh",
]
for ch in self.list_page_url_dict:
list_page_url = self.list_page_url_dict[ch]
......@@ -112,18 +113,19 @@ class Crawler_toutiao():
# }
self.headers = {
"accept": "text/javascript, text/html, application/xml, text/xml, */*",
"accept-encoding": "gzip, deflate",
"accept-language": "zh,zh-CN;q=0.9",
"content-type": "application/x-www-form-urlencoded",
# "cookie": "gftoken=MjA4NTcyMDkyMXwxNTgyOTYxNjM3NjZ8fDAGBgYGBgY; SLARDAR_WEB_ID=9706fc8c-b8a6-4265-8a2e-e3f0739daaf2; UM_distinctid=1708fddb4c0466-04c756d28410e1-752c6c3c-51abc-1708fddb4c1790; CNZZDATA1274386066=608234173-1582960977-https%253A%252F%252Fwww.toutiao.com%252F%7C1582960977",
# "referer": "https://profile.zjurl.cn/rogue/ugc/profile/?user_id=50502346296&media_id=50502346296&request_source=1",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36",
"x-requested-with": "XMLHttpRequest",
"accept": "text/javascript, text/html, application/xml, text/xml, */*",
"accept-encoding": "gzip, deflate",
"accept-language": "zh,zh-CN;q=0.9",
"content-type": "application/x-www-form-urlencoded",
# "cookie": "gftoken=MjA4NTcyMDkyMXwxNTgyOTYxNjM3NjZ8fDAGBgYGBgY; SLARDAR_WEB_ID=9706fc8c-b8a6-4265-8a2e-e3f0739daaf2; UM_distinctid=1708fddb4c0466-04c756d28410e1-752c6c3c-51abc-1708fddb4c1790; CNZZDATA1274386066=608234173-1582960977-https%253A%252F%252Fwww.toutiao.com%252F%7C1582960977",
# "referer": "https://profile.zjurl.cn/rogue/ugc/profile/?user_id=50502346296&media_id=50502346296&request_source=1",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36",
"x-requested-with": "XMLHttpRequest",
}
# log_path = '/home/hanye/crawlersNew/crawler/crawler_log'
# current_day = str(datetime.datetime.now())[:10]
# info_log_file = log_path + '/all_' + current_day + '.log'
......@@ -214,62 +216,62 @@ class Crawler_toutiao():
print(url)
return None
headers = {
"Accept-Encoding": "gzip",
"X-SS-REQ-TICKET": str(int(datetime.datetime.now().timestamp()) * 1e3),
"sdk-version": "1",
# "Cookie": "qh[360]=1; install_id=85200129335; ttreq=1$e8e97b875965bf4af4b5dbaaba4d4a5ec3441e47; history=JM89SDpxGAfw5%2F%2Bo%2F7tEz15%2FZ0tbUEN7Q8FhEYQQIdJ2oNFBgpagCA7BIbUFNUT0NjSkRIvl2AveOdr2XEVUuDS0FFnQEETEo%2BOH5%2Fvj9%2F0WyqF4xphMZNLJeD6aSBmk15Tt4nTWSGUaEHR0e%2BG9aqGfPFOgOXrZ%2BtQBJVI6QXPA89R9dzs2QCqC6eil7H3eQhcFiJOXE4NLgDL9q7FscXLM78Qv62rk0GuiRN511vlNRZioEEArGesNaKhQXxBmHd1q7ic19JNcb90Cu1ELfdQz11KkY4Ob%2BWZYex%2BRPCfFK6uaO12GkJ%2FEN%2BtofMgAVEg8s0qbw2ehgkKiwToovMVNdJP4ai%2Fqvw4vjlLXFi%2BqefWmhTKpUvum%2FoR3VBIvYDrgeYT5YtpNksxJe6WeA3SReODW1diayV1cq%2FzDhf2%2FoqFMognaHwAAAP%2F%2F; odin_tt=8cd4f07f6dc385b01edd52312dd29fbe7fdbfa059194493779de3fe408b8836bb9265292bb9335bc976037dd93e5d131de7acf894a805930417b4d3be7f308e0",
# "X-Gorgon": "0300ddd08400675de6e75ad03849011c863306ddae2b0eb3cec4",
# "X-Khronos": str(int(datetime.datetime.now().timestamp())),
# "Host": "xgapi.snssdk.com",
"Connection": "Keep-Alive",
"Authorization": "HMAC-SHA1:2.0:1573091168911407306:bab42eac5b9e4a8eb25a91fc371ad533:WTfDrhnIsymHfmHCgG9YvRSu2YY=",
"User-Agent": "okhttp/3.10.0.1",
"X-Pods": "",
"Accept-Encoding": "gzip",
"X-SS-REQ-TICKET": str(int(datetime.datetime.now().timestamp()) * 1e3),
"sdk-version": "1",
# "Cookie": "qh[360]=1; install_id=85200129335; ttreq=1$e8e97b875965bf4af4b5dbaaba4d4a5ec3441e47; history=JM89SDpxGAfw5%2F%2Bo%2F7tEz15%2FZ0tbUEN7Q8FhEYQQIdJ2oNFBgpagCA7BIbUFNUT0NjSkRIvl2AveOdr2XEVUuDS0FFnQEETEo%2BOH5%2Fvj9%2F0WyqF4xphMZNLJeD6aSBmk15Tt4nTWSGUaEHR0e%2BG9aqGfPFOgOXrZ%2BtQBJVI6QXPA89R9dzs2QCqC6eil7H3eQhcFiJOXE4NLgDL9q7FscXLM78Qv62rk0GuiRN511vlNRZioEEArGesNaKhQXxBmHd1q7ic19JNcb90Cu1ELfdQz11KkY4Ob%2BWZYex%2BRPCfFK6uaO12GkJ%2FEN%2BtofMgAVEg8s0qbw2ehgkKiwToovMVNdJP4ai%2Fqvw4vjlLXFi%2BqefWmhTKpUvum%2FoR3VBIvYDrgeYT5YtpNksxJe6WeA3SReODW1diayV1cq%2FzDhf2%2FoqFMognaHwAAAP%2F%2F; odin_tt=8cd4f07f6dc385b01edd52312dd29fbe7fdbfa059194493779de3fe408b8836bb9265292bb9335bc976037dd93e5d131de7acf894a805930417b4d3be7f308e0",
# "X-Gorgon": "0300ddd08400675de6e75ad03849011c863306ddae2b0eb3cec4",
# "X-Khronos": str(int(datetime.datetime.now().timestamp())),
# "Host": "xgapi.snssdk.com",
"Connection": "Keep-Alive",
"Authorization": "HMAC-SHA1:2.0:1573091168911407306:bab42eac5b9e4a8eb25a91fc371ad533:WTfDrhnIsymHfmHCgG9YvRSu2YY=",
"User-Agent": "okhttp/3.10.0.1",
"X-Pods": "",
}
print(vid)
url_dic = {
"group_id": vid,
"item_id": vid,
"aggr_type": 0,
"context": 1,
"flags": 64,
# "iid": "77627602260",
# "device_id": random.randint(50000000000,59999999999),
"ac": "wifi",
"channel": "update",
"aid": "13",
"app_name": "news_article",
"version_code": "732",
"version_name": "7.3.2",
"device_platform": "android",
"ab_version": "830855,947965,942635,662176,665176,674051,643894,919834,649427,677130,710077,801968,707372,661900,668775,990369,739390,662099,668774,765190,976875,857803,952277,757281,679101,660830,759657,661781,648315",
"ab_group": "100168",
"ab_feature": "94563,102749",
"ssmix": "a",
# "device_type": "oppo R11s Plus",
# "device_brand": "OPPO",
"language": "zh",
"os_api": "23",
"os_version": "9.0.1",
# "uuid": "250129616283002",
# "openudid": "7313ae71df9e5367",
"manifest_version_code": "731",
"resolution": "810*1440",
"dpi": "270",
"update_version_code": "75410",
"_rticket": int(datetime.datetime.now().timestamp() * 1e3),
# "rom_version": "coloros__v417ir release-keys",
# "fp": "w2TZFzTqczmWFlwOLSU1J2xecSKO",
"tma_jssdk_version": "1.24.0.1",
# "pos": "5r_x8vP69Ono-fi_p6ysq7Opra2kr6ixv_H86fTp6Pn4v6eupLOkra6vpajg",
# "plugin": "0",
# "ts":int(datetime.datetime.now().timestamp()),
# "as":"ab7f9fce505d1d7dbe7f9f",
# "mas":"011993339399f959a359d379b98587814259a359d3997919d319b3"
"group_id": vid,
"item_id": vid,
"aggr_type": 0,
"context": 1,
"flags": 64,
# "iid": "77627602260",
# "device_id": random.randint(50000000000,59999999999),
"ac": "wifi",
"channel": "update",
"aid": "13",
"app_name": "news_article",
"version_code": "732",
"version_name": "7.3.2",
"device_platform": "android",
"ab_version": "830855,947965,942635,662176,665176,674051,643894,919834,649427,677130,710077,801968,707372,661900,668775,990369,739390,662099,668774,765190,976875,857803,952277,757281,679101,660830,759657,661781,648315",
"ab_group": "100168",
"ab_feature": "94563,102749",
"ssmix": "a",
# "device_type": "oppo R11s Plus",
# "device_brand": "OPPO",
"language": "zh",
"os_api": "23",
"os_version": "9.0.1",
# "uuid": "250129616283002",
# "openudid": "7313ae71df9e5367",
"manifest_version_code": "731",
"resolution": "810*1440",
"dpi": "270",
"update_version_code": "75410",
"_rticket": int(datetime.datetime.now().timestamp() * 1e3),
# "rom_version": "coloros__v417ir release-keys",
# "fp": "w2TZFzTqczmWFlwOLSU1J2xecSKO",
"tma_jssdk_version": "1.24.0.1",
# "pos": "5r_x8vP69Ono-fi_p6ysq7Opra2kr6ixv_H86fTp6Pn4v6eupLOkra6vpajg",
# "plugin": "0",
# "ts":int(datetime.datetime.now().timestamp()),
# "as":"ab7f9fce505d1d7dbe7f9f",
# "mas":"011993339399f959a359d379b98587814259a359d3997919d319b3"
}
url = 'http://xgapi.snssdk.com/video/app/article/information/v25/?%s' % (
urllib.parse.urlencode(url_dic))
urllib.parse.urlencode(url_dic))
# get_page = get_redirected_resp(url)
res = retry_get_url(url, headers=headers, timeout=5, proxies=1)
try:
......@@ -489,7 +491,7 @@ class Crawler_toutiao():
# self.loggererror.error('Got KeyError exception: %s at page %s'
# % (except_msg, releaserUrl))
print('Got KeyError exception: %s at page %s' % (
except_msg, releaserUrl))
except_msg, releaserUrl))
try:
print(duration_str)
except:
......@@ -698,14 +700,14 @@ class Crawler_toutiao():
def get_data_mediaid(self, releaserUrl, releaser_id):
headers = {
"Host": "m.toutiao.com",
"Connection": "keep-alive",
"Cache-Control": "max-age=0",
"Upgrade-Insecure-Requests": "1",
"User-Agent": self.random_useragent(),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9"
"Host": "m.toutiao.com",
"Connection": "keep-alive",
"Cache-Control": "max-age=0",
"Upgrade-Insecure-Requests": "1",
"User-Agent": self.random_useragent(),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9"
}
releaserUrl = "http://m.toutiao.com/profile/%s/#mid=%s" % (releaser_id, releaser_id)
time.sleep(1)
......@@ -765,6 +767,156 @@ class Crawler_toutiao():
return video_image_url
def get_web_article_info(self,article_id):
headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh,zh-CN;q=0.9",
"Connection": "keep-alive",
# "Cookie": "tt_webid=6851461299689686542; SLARDAR_WEB_ID=568d391e-7f96-491b-9557-b045a55e9dd8",
"Host": "m.toutiao.com",
"Referer": "https://m.toutiao.com/i6851146167279944199/",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1",
}
headers["Referer"] = "https://m.toutiao.com/i%s" % article_id
url = "https://m.toutiao.com/i{0}/info/?i={1}".format(article_id,article_id)
requests_res = retry_get_url(url,headers=headers,proxies=0)
res_json = requests_res.json()
res_dic = {
"title":res_json["data"].get("title"),
'high_quality_flag':int(res_json["data"].get('high_quality_flag')),
"play_count": int(res_json["data"].get('impression_count')),
"comment_count": res_json["data"].get("comment_count"),
"repost_count": res_json["data"].get("repost_count"),
"favorite_count": res_json["data"].get("digg_count"),
'releaser_followers_count': res_json["data"].get("follower_count"),
'release_time': int(res_json["data"].get('publish_time')*1e3),
"content":res_json["data"].get("content"),
}
return res_dic
def web_releaser_page_article(self, releaserUrl,
releaser_page_num_max=50000,
proxies_num=None,**kwargs):
result_list = []
has_more = True
count = 1
releaser_id = self.find_releaser_id(releaserUrl)
count_false = 0
offset = "0"
headers = {"accept": "text/javascript, text/html, application/xml, text/xml, */*",
"accept-encoding": "gzip, deflate", "accept-language": "zh,zh-CN;q=0.9",
"content-type": "application/x-www-form-urlencoded",
# "cookie": "gftoken=NDAxNzc3NjcyM3wxNTk1MjI3MTU0ODh8fDAGBgYGBgY; SLARDAR_WEB_ID=0ddc45df-54ce-42c5-8dfd-27403ea3319e; s_v_web_id=verify_kcu52781_yF9Mw8Pu_VGOQ_4R2p_8AeG_NwGKWAkt7YLl; ttcid=df5933a4926945c68dde9bf5e5542f9730; tt_scid=KlhjcsMcR9m7a1GIqnzjDfr.XZ0-jnU4X-ZPLZFZ51vyyv6FmjCdmDwYVWtjq2JO18fd",
# "referer": "https://profile.zjurl.cn/rogue/ugc/profile/?user_id=%s&request_source=1" % releaser_id,
"sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1",
"x-requested-with": "XMLHttpRequest"}
# vid = "AB5483CA-FCDC-42F1-AFB1-077A1%sDA" % random.randint(100000, 999999)
# ccid = "F153594D-1310-4984-A4C3-A679D4D%s" % random.randint(10000, 99999)
# openudid = "5d44f2ea1b74e3731b27e5ed8039ac29f%s" % random.randint(1000000, 9999999)
# idfa = "E3FC9054-384B-485F-9B4C-936F33D7D%s" % random.randint(100, 999)
# iid = str(random.randint(104525900000, 104526000000))
while has_more and count <= releaser_page_num_max:
# url = "https://{0}.snssdk.com/api/feed/profile/v{1}/?category=profile_video&{2}".format(
# random.choice(self.api_list), random.randint(5, 10), urllib.parse.urlencode(url_dic))
# url = """https://{0}.snssdk.com/api/feed/profile/v{1}/?category=profile_video&visited_uid={2}&client_extra_params=&count=20&offset={3}&stream_api_version=88&category=profile_video&version_code=7.6.0&app_name=news_article&channel=App%20Store&resolution=1536*2048&aid=13&ab_feature=794528&ab_version=765192,857803,660830,1444046,1397712,1434498,662176,801968,1419045,668775,1462526,1190525,1489306,1493796,1439625,1469498,668779,1417599,662099,1477261,1484884,668774,1496422,1427395&ab_group=794528&pos=5pe9vb/x8v788cLx/On47unC7fLuv72nveaXvb29vb/ 8vLv fTz/On4y/zx6Pjuv72nveaXvb29vb29v/Hy8/r06ej5 L 9p72tsZe9vb29vb2/8fzp9Ono fi/vae9rZe9vb294Je9veCX4A==&update_version_code=76014&ac=WIFI&os_version=13.3.1&ssmix=a&device_platform=ipad&ab_client=a1,f2,f7,e1&device_type=iPad6,11""".format(random.choice(self.api_list), random.randint(1, 1),str(releaser_id),str(offset))
# url = "https://{0}.snssdk.com/api/feed/profile/v{1}/?category=profile_video&visited_uid={2}&stream_api_version=47&count=20&offset={3}&ac=wifi&channel=wap_test_lite_1&aid=35&app_name=news_article_lite&version_code=715&version_name=7.1.5&device_platform=android&ab_version=668903,668905,668907,808414,772541,1378617,668908,668904,668906,1401332,1496418,928942&ab_client=a1,c2,e1,f2,g2,f7&ab_feature=z1&abflag=3&ssmix=a&device_type=OPPO R11&device_brand=OPPO&language=zh&os_api=22&os_version=5.1.1&manifest_version_code=715&resolution=900*1600&dpi=320&update_version_code=71504&sa_enable=0&fp=a_fake_fp&tma_jssdk_version=1.25.4.2&rom_version=coloros__r11-user 5.1.1 nmf26x 500200210 release-keys&plugin_state=30631999".format(
# random.choice(self.api_list), random.randint(5, 10), str(releaser_id), str(offset))
url = "https://profile.zjurl.cn/api/feed/profile/v2/?category=profile_article&visited_uid={0}&stream_api_version=82&request_source=1&offset={1}&user_id={2}".format(
str(releaser_id), str(offset), str(releaser_id))
try:
proxies = get_proxy(proxies_num)
if proxies:
# proxies = {
# "http": "http://127.0.0.1:80",
# "https": "http://127.0.0.1:443"
# }
get_page = requests.get(url, headers=self.headers, proxies=proxies, timeout=10)
else:
get_page = requests.get(url, headers=self.headers, timeout=10)
except:
continue
print("get_page %s on page %s" % (releaser_id, count))
page_dic = {}
try:
page_dic = get_page.json()
if page_dic.get("message") != "success":
count_false += 1
if count_false < 3:
continue
else:
print("unknow error")
break
data_list = page_dic.get('data')
has_more = page_dic.get('has_more')
offset = str(page_dic.get("offset"))
except:
if not page_dic:
count_false += 1
if count_false >= 3:
break
else:
continue
if data_list:
data_list = page_dic.get('data')
has_more = page_dic.get('has_more')
else:
data_list = []
has_more = False
# offset = page_dic.get('offset')
if has_more is None:
has_more = False
if not data_list:
print("toutiao no data in releaser %s page %s" % (releaser_id, count))
# print(page_dic)
# print(url)
count_false += 1
proxies = get_proxy(1)
if count_false >= 5:
has_more = False
break
continue
else:
count_false = 0
count += 1
for one_video in data_list:
# print(one_video)
# info_str = one_video.get('content')
info_dic = json.loads(one_video["content"])
video_dic = copy.deepcopy(self.video_data)
video_dic['title'] = info_dic.get('title')
video_dic['abstract'] = info_dic.get('abstract')
video_dic['url'] = info_dic.get('share_url')
video_dic['releaser'] = info_dic.get('source')
video_dic['releaserUrl'] = releaserUrl
release_time = info_dic.get('publish_time')
video_dic['release_time'] = int(release_time * 1e3)
video_dic['duration'] = info_dic.get('video_duration')
video_dic['play_count'] = info_dic.get('read_count')
video_dic['repost_count'] = info_dic.get('forward_info').get('forward_count')
video_dic['comment_count'] = info_dic.get('comment_count')
video_dic['favorite_count'] = info_dic.get('digg_count')
video_dic['article_id'] = info_dic.get('tag_id')
video_dic['fetch_time'] = int(time.time() * 1e3)
video_dic['releaser_id_str'] = "toutiao_%s" % releaser_id
video_dic['video_img'] = self.get_video_image(info_dic)
video_dic['id'] = cal_doc_id(video_dic["platform"], url=video_dic["url"], doc_id_type='all-time-url', data_dict=video_dic)
try:
article_info = self.get_web_article_info(info_dic.get('tag_id'))
video_dic.update(article_info)
except Exception as e:
print("method get_web_article_info error %s" %e)
yield video_dic
def App_releaser_page_video(self, releaserUrl,
output_to_file=False,
filepath=None,
......@@ -864,7 +1016,7 @@ class Crawler_toutiao():
# }
# url = "https://{0}.snssdk.com/api/feed/profile/v{1}/?category=profile_video&{2}".format(
# random.choice(self.api_list), random.randint(5, 10), urllib.parse.urlencode(url_dic))
#url = """https://{0}.snssdk.com/api/feed/profile/v{1}/?category=profile_video&visited_uid={2}&client_extra_params=&count=20&offset={3}&stream_api_version=88&category=profile_video&version_code=7.6.0&app_name=news_article&channel=App%20Store&resolution=1536*2048&aid=13&ab_feature=794528&ab_version=765192,857803,660830,1444046,1397712,1434498,662176,801968,1419045,668775,1462526,1190525,1489306,1493796,1439625,1469498,668779,1417599,662099,1477261,1484884,668774,1496422,1427395&ab_group=794528&pos=5pe9vb/x8v788cLx/On47unC7fLuv72nveaXvb29vb/ 8vLv fTz/On4y/zx6Pjuv72nveaXvb29vb29v/Hy8/r06ej5 L 9p72tsZe9vb29vb2/8fzp9Ono fi/vae9rZe9vb294Je9veCX4A==&update_version_code=76014&ac=WIFI&os_version=13.3.1&ssmix=a&device_platform=ipad&ab_client=a1,f2,f7,e1&device_type=iPad6,11""".format(random.choice(self.api_list), random.randint(1, 1),str(releaser_id),str(offset))
# url = """https://{0}.snssdk.com/api/feed/profile/v{1}/?category=profile_video&visited_uid={2}&client_extra_params=&count=20&offset={3}&stream_api_version=88&category=profile_video&version_code=7.6.0&app_name=news_article&channel=App%20Store&resolution=1536*2048&aid=13&ab_feature=794528&ab_version=765192,857803,660830,1444046,1397712,1434498,662176,801968,1419045,668775,1462526,1190525,1489306,1493796,1439625,1469498,668779,1417599,662099,1477261,1484884,668774,1496422,1427395&ab_group=794528&pos=5pe9vb/x8v788cLx/On47unC7fLuv72nveaXvb29vb/ 8vLv fTz/On4y/zx6Pjuv72nveaXvb29vb29v/Hy8/r06ej5 L 9p72tsZe9vb29vb2/8fzp9Ono fi/vae9rZe9vb294Je9veCX4A==&update_version_code=76014&ac=WIFI&os_version=13.3.1&ssmix=a&device_platform=ipad&ab_client=a1,f2,f7,e1&device_type=iPad6,11""".format(random.choice(self.api_list), random.randint(1, 1),str(releaser_id),str(offset))
# url = "https://{0}.snssdk.com/api/feed/profile/v{1}/?category=profile_video&visited_uid={2}&stream_api_version=47&count=20&offset={3}&ac=wifi&channel=wap_test_lite_1&aid=35&app_name=news_article_lite&version_code=715&version_name=7.1.5&device_platform=android&ab_version=668903,668905,668907,808414,772541,1378617,668908,668904,668906,1401332,1496418,928942&ab_client=a1,c2,e1,f2,g2,f7&ab_feature=z1&abflag=3&ssmix=a&device_type=OPPO R11&device_brand=OPPO&language=zh&os_api=22&os_version=5.1.1&manifest_version_code=715&resolution=900*1600&dpi=320&update_version_code=71504&sa_enable=0&fp=a_fake_fp&tma_jssdk_version=1.25.4.2&rom_version=coloros__r11-user 5.1.1 nmf26x 500200210 release-keys&plugin_state=30631999".format(
# random.choice(self.api_list), random.randint(5, 10), str(releaser_id), str(offset))
url = "https://profile.zjurl.cn/api/feed/profile/v1/?category=profile_video&visited_uid={0}&stream_api_version=82&request_source=1&offset={1}&user_id={2}".format(
......@@ -968,7 +1120,8 @@ class Crawler_toutiao():
count_false = 0
count_no_data = 0
offset = "0"
self.headers["referer"] = "https://profile.zjurl.cn/rogue/ugc/profile/?user_id=%s&request_source=1" % releaser_id
self.headers[
"referer"] = "https://profile.zjurl.cn/rogue/ugc/profile/?user_id=%s&request_source=1" % releaser_id
# vid = "AB5483CA-FCDC-42F1-AFB1-077A1%sDA" % random.randint(100000, 999999)
# ccid = "F153594D-1310-4984-A4C3-A679D4D%s" % random.randint(10000, 99999)
# openudid = "5d44f2ea1b74e3731b27e5ed8039ac29f%s" % random.randint(1000000, 9999999)
......@@ -1156,18 +1309,18 @@ class Crawler_toutiao():
video_url = info_dic.get("raw_data").get("origin_group").get('article_url')
elif info_dic.get("raw_data").get("comment_base"):
video_url = info_dic.get("raw_data").get("comment_base").get('share').get(
'share_url')
'share_url')
elif info_dic.get("raw_data").get("action"):
video_url = "https://m.toutiaoimg.cn/group/%s/" % info_dic.get("raw_data").get(
'group_id')
'group_id')
video_dic['video_id'] = info_dic.get("raw_data").get('group_id')
video_dic['play_count'] = info_dic.get("raw_data").get("action").get("play_count")
video_dic['repost_count'] = info_dic.get("raw_data").get("action").get(
"share_count")
"share_count")
video_dic['comment_count'] = info_dic.get("raw_data").get("action").get(
'comment_count')
'comment_count')
video_dic['favorite_count'] = info_dic.get("raw_data").get("action").get(
'digg_count')
'digg_count')
video_dic['duration'] = info_dic.get('raw_data').get('video').get("duration")
video_dic['title'] = info_dic.get('raw_data').get("title")
video_dic['releaser'] = info_dic.get('raw_data').get("user").get("info").get("name")
......@@ -1176,7 +1329,7 @@ class Crawler_toutiao():
video_dic['releaser_id_str'] = "toutiao_%s" % releaser_id
video_dic['fetch_time'] = int(datetime.datetime.now().timestamp() * 1e3)
video_dic['video_img'] = "http://p1-tt.bytecdn.cn/large/" + info_dic.get(
'raw_data').get('video').get("origin_cover").get("uri")
'raw_data').get('video').get("origin_cover").get("uri")
video_dic['release_time'] = int(info_dic.get("raw_data").get("create_time") * 1e3)
video_url = None
if video_url:
......@@ -1221,16 +1374,16 @@ class Crawler_toutiao():
data_count = 0
# print(as_cp_sign)
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"content-type": "application/x-www-form-urlencoded",
"x-requested-with": "XMLHttpRequest",
"Referer": "https://www.toutiao.com/c/user/%s/" % releaser_id,
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36",
# "cookie":'cookie: tt_webid=6673330506500982276; WEATHER_CITY=%E5%8C%97%E4%BA%AC; UM_distinctid=169c3156bb86b3-00d2e2a0ad50b2-7a1437-161398-169c3156bb9746; tt_webid=6673330506500982276; csrftoken=301d4862d95090ad520f8a54ae360b93; uuid="w:79cdae1ec41c48c9b9cd21255077f629"; CNZZDATA1259612802=281397494-1553752275-https%253A%252F%252Fwww.baidu.com%252F%7C1555306390',
"cache-control": "max-age=0",
"upgrade-insecure-requests": "1"
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"content-type": "application/x-www-form-urlencoded",
"x-requested-with": "XMLHttpRequest",
"Referer": "https://www.toutiao.com/c/user/%s/" % releaser_id,
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36",
# "cookie":'cookie: tt_webid=6673330506500982276; WEATHER_CITY=%E5%8C%97%E4%BA%AC; UM_distinctid=169c3156bb86b3-00d2e2a0ad50b2-7a1437-161398-169c3156bb9746; tt_webid=6673330506500982276; csrftoken=301d4862d95090ad520f8a54ae360b93; uuid="w:79cdae1ec41c48c9b9cd21255077f629"; CNZZDATA1259612802=281397494-1553752275-https%253A%252F%252Fwww.baidu.com%252F%7C1555306390',
"cache-control": "max-age=0",
"upgrade-insecure-requests": "1"
}
user_page_url = "https://www.toutiao.com/c/user/%s/" % releaser_id
user_page = requests.get(user_page_url, headers=headers)
......@@ -1239,7 +1392,7 @@ class Crawler_toutiao():
# print(str(releaser_id)+str(max_behot_time))
# js_head = json.loads(get_js(str(releaser_id)+str(max_behot_time)))
get_as_cp_sign = requests.get(
"http://127.0.0.1:3000/?id=%s&max_behot_time=%s" % (releaser_id, max_behot_time))
"http://127.0.0.1:3000/?id=%s&max_behot_time=%s" % (releaser_id, max_behot_time))
as_cp_sign = get_as_cp_sign.json()
url_dic = {"page_type": "0",
"user_id": releaser_id,
......@@ -1434,12 +1587,12 @@ class Crawler_toutiao():
media_id = releaser_id
headers = {
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "max-age=0",
"upgrade-insecure-requests": "1",
"Referer": "http://m.toutiao.com/profile/%s/" % releaser_id,
"User-Agent": self.random_useragent(),
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "max-age=0",
"upgrade-insecure-requests": "1",
"Referer": "http://m.toutiao.com/profile/%s/" % releaser_id,
"User-Agent": self.random_useragent(),
}
while has_more == 1 and count <= releaser_page_num_max:
......@@ -1545,7 +1698,7 @@ class Crawler_toutiao():
es_index=None,
doc_type=None,
proxies_num=None):
for res in self.App_releaser_page_video(releaserUrl, output_to_file=output_to_file, filepath=filepath,
for res in self.web_releaser_page_article(releaserUrl, output_to_file=output_to_file, filepath=filepath,
releaser_page_num_max=releaser_page_num_max,
output_to_es_raw=output_to_es_raw,
output_to_es_register=output_to_es_register,
......@@ -1574,28 +1727,28 @@ class Crawler_toutiao():
@staticmethod
def random_useragent():
agent_lis = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
"Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
"Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36"
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
"Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
"Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36"
]
return agent_lis[random.randrange(0, len(agent_lis))]
......@@ -1676,8 +1829,9 @@ class Crawler_toutiao():
if __name__ == '__main__':
data_lis = ["https://www.toutiao.com/c/user/5839829632/#mid=5839829632",
'http://m.365yg.com/video/app/user/home/?to_user_id=52299115946&format=html',
data_lis = [
# "https://www.toutiao.com/c/user/5839829632/#mid=5839829632",
'http://m.365yg.com/video/app/user/home/?to_user_id=58914711545&format=html',
'http://m.365yg.com/video/app/user/home/?to_user_id=50002654647&format=html',
'http://m.365yg.com/video/app/user/home/?to_user_id=72306985675&format=html',
......@@ -1740,7 +1894,7 @@ if __name__ == '__main__':
# res = test.video_page("https://www.ixigua.com/i6701478014242259463/")
# print(res)
for url in data_lis:
test.releaser_page_by_time(1582272540000, 1582964230998 , url, output_to_es_raw=True,
test.releaser_page_by_time(1582272540000, 1595302556249, url, output_to_es_raw=True,
es_index='crawler-data-raw',
doc_type='doc', releaser_page_num_max=2,
proxies_num=0
......
......@@ -15,7 +15,7 @@ from elasticsearch.helpers import scan
from func_find_week_num import find_week_belongs_to
from crawler.crawler_sys.framework.platform_crawler_register import get_crawler
from crawler.crawler_sys.utils import trans_format
from func_cal_doc_id import cal_doc_id
from write_data_into_es.func_cal_doc_id import cal_doc_id
hosts = '192.168.17.11'
port = 80
......
......@@ -6,26 +6,17 @@ Created on Wed Jun 20 09:19:12 2018
"""
import hashlib
try:
from write_data_into_es.func_calculate_toutiao_video_id import calculate_toutiao_video_id
from write_data_into_es.func_calculate_newTudou_video_id import calculate_newTudou_video_id
from write_data_into_es.func_calculate_v_qq_video_id import calculate_v_qq_video_id
#from func_calculate_kwai_video_id_by_data import calculate_kwai_video_id_by_data
from write_data_into_es.func_calculate_kwai_video_id_by_url import calculate_kwai_video_id_by_data_by_url
from write_data_into_es.func_calculate_txxw_video_id import calculate_txxw_video_id
from write_data_into_es.func_calculate_wangyi_news_id import calculate_wangyi_news_id
from write_data_into_es.func_calculate_douyin_id import calculate_douyin_id
from write_data_into_es.func_calculate_haokan_video_id import calculate_haokan_id
except:
from write_data_into_es_new.func_calculate_toutiao_video_id import calculate_toutiao_video_id
from write_data_into_es_new.func_calculate_newTudou_video_id import calculate_newTudou_video_id
from write_data_into_es_new.func_calculate_v_qq_video_id import calculate_v_qq_video_id
# from func_calculate_kwai_video_id_by_data import calculate_kwai_video_id_by_data
from write_data_into_es_new.func_calculate_kwai_video_id_by_url import calculate_kwai_video_id_by_data_by_url
from write_data_into_es_new.func_calculate_txxw_video_id import calculate_txxw_video_id
from write_data_into_es_new.func_calculate_wangyi_news_id import calculate_wangyi_news_id
from write_data_into_es_new.func_calculate_douyin_id import calculate_douyin_id
from write_data_into_es_new.func_calculate_haokan_video_id import calculate_haokan_id
from write_data_into_es.func_calculate_toutiao_video_id import calculate_toutiao_video_id
from write_data_into_es.func_calculate_newTudou_video_id import calculate_newTudou_video_id
from write_data_into_es.func_calculate_v_qq_video_id import calculate_v_qq_video_id
#from func_calculate_kwai_video_id_by_data import calculate_kwai_video_id_by_data
from write_data_into_es.func_calculate_kwai_video_id_by_url import calculate_kwai_video_id_by_data_by_url
from write_data_into_es.func_calculate_txxw_video_id import calculate_txxw_video_id
from write_data_into_es.func_calculate_wangyi_news_id import calculate_wangyi_news_id
from write_data_into_es.func_calculate_douyin_id import calculate_douyin_id
from write_data_into_es.func_calculate_haokan_video_id import calculate_haokan_id
def vid_cal_func(platform):
vid_cal_func_dict = {
......
from func_get_releaser_id import get_releaser_id
from write_data_into_es.func_get_releaser_id import get_releaser_id
def calculate_txxw_video_id(data_dict):
try:
......
......@@ -7,11 +7,11 @@ from elasticsearch import Elasticsearch
import json, copy
from write_data_into_es.func_get_releaser_id import get_releaser_id
from write_data_into_es.func_cal_doc_id import cal_doc_id
from func_transfer_from_ftp import transfer_from_ftp
from write_data_into_es.func_transfer_from_ftp import transfer_from_ftp
import logging
from urllib.parse import parse_qs,urlparse
from elasticsearch.helpers import scan
from ReleaserMeta import ReleaseMeta
from write_data_into_es.ReleaserMeta import ReleaseMeta
hosts = '192.168.17.11'
port = 80
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment