# -*- coding:utf-8 -*- # @Time : 2019/4/17 9:15 # @Author : litao # -*- coding: utf-8 -*- import os import re import time import copy,random import requests import datetime import json from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from crawler.crawler_sys.framework.video_fields_std import Std_fields_video from crawler.crawler_sys.utils.output_results import output_result from crawler.crawler_sys.utils.util_logging import logged from fontTools.ttLib import * try: from crawler_sys.framework.func_get_releaser_id import * except: from func_get_releaser_id import * from crawler.crawler_sys.proxy_pool.func_get_proxy_form_kuaidaili import get_proxy # from crawler.crawler_sys.utils.func_verification_code import Login from crawler.crawler_sys.utils.trans_str_play_count_to_int import trans_play_count class Crawler_kwai(): def __init__(self, timeout=None, platform='kwai'): if timeout == None: self.timeout = 10 else: self.timeout = timeout self.platform = platform self.TotalVideo_num = None self.midstepurl = None std_fields = Std_fields_video() self.video_data = std_fields.video_data self.video_data['platform'] = self.platform unused_key_list = ['channel', 'describe', 'repost_count', 'isOriginal'] for key in unused_key_list: self.video_data.pop(key) self.first_page_headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.9", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Host": "live.kuaishou.com", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36", # "Cookie": "did=web_c7c42d62cbb24{0}4d1ca5ffca052c3; didv=1582271776000; sid=e12d2ec74ec7af3a24d{1}cd6;pua5rv=1".format( # random.randint(1000, 9000), random.randint(20, 99)), } self.loginObj = Login() self.get_cookies_and_front = self.loginObj.get_cookies_and_front def get_cookies_and_font(self,releaserUrl): self.cookie_dic, self.uni_code_dic = self.get_cookies_and_front(releaserUrl) # self.cookie_dic = {} # chrome_options = webdriver.ChromeOptions() # chrome_options.add_argument('--headless') # chrome_options.add_argument('--disable-gpu') # # driver = webdriver.Remote(command_executor='http://192.168.18.11:4444/wd/hub', # # desired_capabilities=DesiredCapabilities.CHROME) # driver = webdriver.Chrome(r'chromedriver', options=chrome_options) # driver.get(url) # time.sleep(2) # driver.get(url) # cookie = driver.get_cookies() # for k in cookie: # self.cookie_dic[k["name"]] = k["value"] # # print(self.cookie_dic) # # font_face = driver.find_element_by_xpath("/html/head/style[1]") # font_woff_link = re.findall("url\('(.*?)'\)\s+format\('woff'\)", font_face.get_attribute("innerHTML")) # woff_name = font_woff_link[0].split("/")[-1] # print(woff_name) # woff = requests.get(font_woff_link[0]).content # os_path = "/home/hanye/" # this_path = os.path.isdir(os_path) # if not this_path: # os_path = "." # try: # f = open("%s/%s.xml" % (os_path, woff_name), encoding="utf-8") # except: # woff = requests.get(font_woff_link[0], # headers={ # "Referer": url, # "Sec-Fetch-Mode": "cors", # "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36"}).content # with open("%s/%s" % (os_path, woff_name), "wb") as f: # f.write(woff) # font = TTFont("%s/%s" % (os_path, woff_name)) # font.saveXML("%s/%s.xml" % (os_path, woff_name)) # f = open("%s/%s.xml" % (os_path, woff_name), encoding="utf-8") # #f = open("./%s.xml" % woff_name, encoding="utf-8") # self.xml_text = f.read() # driver.quit() # self.uni_code_dic = self.get_num_dic() def get_releaser_id(self,releaserUrl): return get_releaser_id(platform=self.platform, releaserUrl=releaserUrl) @staticmethod def re_cal_count(count_num): if isinstance(count_num, int): return count_num if isinstance(count_num, str): if count_num[-1] == "w": return int(float(count_num[:-1]) * 10000) try: return int(count_num) except: return False return False # def get_num_dic(self): # xml_re = { # '': 0, # '': 0, # '': 1, # '': 1, # '': 2, # '': 2, # '': 3, # '': 3, # '': 4, # '': 4, # '': 5, # '': 5, # '': 6, # '': 6, # '': 7, # '': 7, # '': 8, # '': 8, # '': 9, # '': 9 # } # uni_code_dic = {} # try: # for re_code in xml_re: # code_dic = re.findall(re_code, self.xml_text) # if code_dic: # uni_code_dic[code_dic[0].replace("uni", "\\\\u").lower()] = xml_re[re_code] # print("uni_code_dic", uni_code_dic) # return uni_code_dic # except: # print(self.xml_text,"error front_error") # return False # def unicode_to_num(self,uni_str): count_num = str(uni_str.encode("unicode_escape"))[2:-1] #print(count_num) for i in self.uni_code_dic: if i in count_num: count_num = count_num.replace(i, str(self.uni_code_dic[i])) #print(count_num) return count_num @staticmethod def get_video_image(data): pass def get_web_url_cookies(self,releaserUrl): # firset_page = requests.get(releaserUrl, headers=self.first_page_headers) # cookie = firset_page.cookies firset_page = requests.get(releaserUrl, headers=self.first_page_headers) cookie = firset_page.cookies cookie = requests.utils.dict_from_cookiejar(cookie) cookie["pua5rv"] = "1" cookie["didv"] = "1582271776000" cookie["sid"] = "e12d2ec74ec7af3a24d{0}cd6".format(random.randint(10,99)) cookie.pop("kuaishou.live.bfb1s",0) print(cookie) return cookie def releaser_page(self,releaserUrl,**kwargs): for data in self.releaser_page_web(releaserUrl,**kwargs): yield data # for data in self.releaser_page_pc(releaserUrl,**kwargs): # yield data def releaser_page_web(self, releaserUrl, output_to_file=False, filepath=None, releaser_page_num_max=5000, output_to_es_raw=False, es_index=None, doc_type=None, output_to_es_register=False, push_to_redis=False, proxies_num=None, **kwargs): """ get video info from api instead of web page html the most scroll page is 1000 # """ releaser = "" count = 1 # has_more = True retry_time = 0 result_list = [] releaser_id = self.get_releaser_id(releaserUrl) releaserUrl = 'https://live.kuaishou.com/profile/%s' % releaser_id principalId = releaser_id self.video_data['releaserUrl'] = releaserUrl pcursor = None headers = { "Accept": "application/json", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh,zh-CN;q=0.9", "Connection": "keep-alive", "Content-Type": "application/json; charset=UTF-8", "Cookie": "clientid=3; did=web_549cd4825914642449695ddccf5bfa99; client_key=65890b29; userId=%s; didv=1589785882000; sid=a94d55c86bbbccd28b8e2a8d" % random.randint(861446000,861449800), "Host": "c.kuaishou.com", "kpf": "H5", "kpn": "KUAISHOU", "Origin": "https://c.kuaishou.com", "Referer": "https://c.kuaishou.com/fw/user/%s?fid=1535125321&cc=share_copylink&shareMethod=TOKEN&docId=0&kpn=KUAISHOU&subBiz=PROFILE&shareId=176513752168&docABKey=share_textid_profile&shareToken=X6btjdy2izGxVqQ_A&shareResourceType=PROFILE_OTHER&groupABKey=share_group_profile&groupName=&expTag=null&appType=21&shareObjectId=1478754458&shareUrlOpened=0" %releaser_id, "Sec-Fetch-Dest": "empty", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-origin", "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1", } # cookies = self.get_web_url_cookies(headers["Referer"]) proxies = get_proxy(proxies_num) while count <= releaser_page_num_max and count <= 1000 and pcursor != "no_more": try: if proxies_num: get_page = requests.post("https://c.kuaishou.com/rest/kd/feed/profile", json={"eid": releaser_id, "count": 18, "pcursor": pcursor}, headers=headers, timeout=10, proxies=proxies) else: get_page = requests.post("https://c.kuaishou.com/rest/kd/feed/profile", json={"eid": releaser_id, "count": 18, "pcursor": pcursor}, headers=headers, timeout=10) except: proxies = get_proxy(proxies_num) continue # print(get_page.content) time.sleep(random.randint(3,4)) page_dic = get_page.json() data_list = page_dic.get("feeds") # print(data_list) # if not data_list: # get_page = requests.post("https://kpfbeijing.m.chenzhongtech.com/rest/kd/feed/profile", # json={"eid": releaser_id, "count":50, "pcursor": pcursor}, # headers=headers, timeout=10) # page_dic = get_page.json() # data_list = page_dic.get("feeds") # time.sleep(2) if not data_list: print("no more data at releaser: %s page: %s " % (releaser_id, count)) proxies = get_proxy(proxies_num) retry_time += 1 if retry_time > 3: proxies_num = 0 if retry_time > 5: pcursor = "no_more" continue else: pcursor = page_dic.get("pcursor") print("get data at releaser: %s page: %s" % (releaser_id, count)) count += 1 for info_dic in data_list: video_dic = copy.deepcopy(self.video_data) try: video_dic['title'] = info_dic.get('caption') releaser_id_ = info_dic.get("userEid") photoId_list = info_dic.get('share_info').split("&") for photoid in photoId_list: if "photoId=" in photoid: photoid = photoid.replace("photoId=", "") break video_dic['video_id'] = photoid video_dic['url'] = "https://live.kuaishou.com/u/%s/%s" % (releaser_id_, photoid) video_dic['release_time'] = info_dic.get('timestamp') video_dic['releaser'] = info_dic.get("userName") video_dic['play_count'] = trans_play_count(info_dic.get("viewCount")) video_dic['comment_count'] = trans_play_count(info_dic.get("commentCount")) video_dic['favorite_count'] = trans_play_count(info_dic.get('likeCount')) video_dic['repost_count'] = trans_play_count(info_dic.get('forwardCount')) video_dic['fetch_time'] = int(time.time() * 1e3) try: video_dic['duration'] = int(info_dic.get("ext_params").get("video")/1000) except: video_dic['duration'] = 0 print("duration error") video_dic['releaser_id_str'] = "kwai_%s" % (releaser_id_) video_dic['releaserUrl'] = 'https://live.kuaishou.com/profile/%s' % releaser_id_ video_dic['video_img'] = info_dic.get("coverUrls")[0].get("url") except Exception as e: print(e) continue if video_dic['play_count'] is False or video_dic['comment_count'] is False or video_dic[ 'favorite_count'] is False: print(info_dic) continue else: yield video_dic # @logged def releaser_page_pc(self, releaserUrl, output_to_file=False, filepath=None, releaser_page_num_max=10000, output_to_es_raw=False, es_index=None, doc_type=None, output_to_es_register=False, push_to_redis=False,proxies_num=None): """ get video info from api instead of web page html the most scroll page is 1000 """ releaser = "" user_id = "153512{0}".format(random.randint(1000,9000)) proxies = get_proxy(proxies_num) headers = { "accept": "*/*", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.9", "Connection": "keep-alive", "content-type": "application/json", "Cookie": "did=web_504e72386a69c6d6172f1457b591415c ;userId=%s" % (user_id), "Host": "live.kuaishou.com", "Origin": "https://live.kuaishou.com", "Referer": releaserUrl, "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-origin", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36" } count = 1 # has_more = True retry_time = 0 result_list = [] releaser_id = self.get_releaser_id(releaserUrl) pcursor = "" principalId = releaser_id self.video_data['releaserUrl'] = releaserUrl while count <= releaser_page_num_max and count <= 1000 and pcursor != "no_more": time.sleep(random.randint(1,2)) # self.get_cookies_and_font(releaserUrl) url_dic = {"operationName":"publicFeedsQuery","variables": {"principalId":releaser_id,"pcursor":pcursor,"count":100}, "query":"query publicFeedsQuery($principalId: String, $pcursor: String, $count: Int) {\n publicFeeds(principalId: $principalId, pcursor: $pcursor, count: $count) {\n pcursor\n live {\n user {\n id\n avatar\n name\n __typename\n }\n watchingCount\n poster\n coverUrl\n caption\n id\n playUrls {\n quality\n url\n __typename\n }\n quality\n gameInfo {\n category\n name\n pubgSurvival\n type\n kingHero\n __typename\n }\n hasRedPack\n liveGuess\n expTag\n __typename\n }\n list {\n id\n thumbnailUrl\n poster\n workType\n type\n useVideoPlayer\n imgUrls\n imgSizes\n magicFace\n musicName\n caption\n location\n liked\n onlyFollowerCanComment\n relativeHeight\n timestamp\n width\n height\n counts {\n displayView\n displayLike\n displayComment\n __typename\n }\n user {\n id\n eid\n name\n avatar\n __typename\n }\n expTag\n __typename\n }\n __typename\n }\n}\n"} api_url = 'https://live.kuaishou.com/m_graphql' try: if proxies: get_page = requests.post(api_url, headers=headers, json=url_dic,timeout=5,proxies=proxies) else: get_page = requests.post(api_url, headers=headers, json=url_dic,timeout=5) except: proxies = get_proxy(proxies_num) continue #print(get_page.content) page_dic = get_page.json() data_list = page_dic.get("data").get("publicFeeds").get("list") #print(data_list) if data_list == []: print("no more data at releaser: %s page: %s " % (releaser_id, count)) # self.loginObj.delete_cookies(self.cookie_dic) proxies = get_proxy(proxies_num) retry_time += 1 if retry_time > 3: pcursor = "no_more" continue else: pcursor = page_dic.get("data").get("publicFeeds").get("pcursor") print("get data at releaser: %s page: %s" % (releaser_id, count)) count += 1 for info_dic in data_list: video_dic = copy.deepcopy(self.video_data) video_dic['title'] = info_dic.get('caption') releaser_id = info_dic.get('user').get("eid") video_dic['url'] = "https://live.kuaishou.com/u/%s/%s" % (releaser_id, info_dic.get('id')) video_dic['releaser'] = info_dic.get('user').get("name") video_dic['release_time'] = info_dic.get('timestamp') video_dic['play_count'] = trans_play_count(info_dic.get('counts').get("displayView")) video_dic['comment_count'] = trans_play_count(info_dic.get('counts').get("displayComment")) video_dic['favorite_count'] = trans_play_count(info_dic.get('counts').get("displayLike")) video_dic['video_id'] = info_dic.get('id') video_dic['fetch_time'] = int(time.time() * 1e3) video_dic['releaser_id_str'] = "kwai_%s"% (releaser_id) video_dic['releaserUrl'] = 'https://live.kuaishou.com/profile/%s' % releaser_id video_dic['video_img'] = self.get_video_image(info_dic) if video_dic['play_count'] is False or video_dic['comment_count'] is False or video_dic['favorite_count'] is False: print(info_dic) continue else: yield video_dic def releaser_page_by_time(self, start_time, end_time, url,allow,**kwargs): count_false = 0 for res in self.releaser_page(url,proxies_num=kwargs.get("proxies_num")): video_time = res["release_time"] # print(res) if video_time: if start_time < video_time: if video_time < end_time: yield res else: count_false += 1 if count_false > allow: break else: yield res def get_releaser_follower_num(self, releaserUrl): count_true = 0 headers = { "Accept": "*/*", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.9", "content-type": "application/json", "Referer": releaserUrl, "Origin": "https://live.kuaishou.com", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Host": "live.kuaishou.com", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-origin", } while count_true < 5: proxies = get_proxy(proxies_num=1) self.get_cookies_and_font(releaserUrl) releaser_id = self.get_releaser_id(releaserUrl) if not releaser_id: return None, None post_url = 'https://live.kuaishou.com/graphql' post_dic = {"operationName": "userInfoQuery", "variables": {"principalId": releaser_id}, "query": "query userInfoQuery($principalId: String) {\n userInfo(principalId: $principalId) {\n id\n principalId\n kwaiId\n eid\n userId\n profile\n name\n description\n sex\n constellation\n cityName\n living\n watchingCount\n isNew\n privacy\n feeds {\n eid\n photoId\n thumbnailUrl\n timestamp\n __typename\n }\n verifiedStatus {\n verified\n description\n type\n new\n __typename\n }\n countsInfo {\n fan\n follow\n photo\n liked\n open\n playback\n private\n __typename\n }\n bannedStatus {\n banned\n defriend\n isolate\n socialBanned\n __typename\n }\n __typename\n }\n}\n"} try: releaser_page = requests.post(post_url, headers=headers, cookies=self.cookie_dic, json=post_dic, proxies=proxies, timeout=2) except: releaser_page = requests.post(post_url, headers=headers, cookies=self.cookie_dic, json=post_dic) res_dic = releaser_page.json() print(res_dic) if res_dic.get("errors"): self.loginObj.delete_cookies(self.cookie_dic) try: releaser_follower_num_str = res_dic["data"]["userInfo"]["countsInfo"]["fan"] releaser_follower_num = self.re_cal_count(self.unicode_to_num(releaser_follower_num_str)) print(releaser_follower_num) releaser_img = self.get_releaser_image(data=res_dic) return releaser_follower_num, releaser_img except: if count_true == 4: self.loginObj.delete_cookies(self.cookie_dic) count_true += 1 return None, None # @staticmethod # def get_video_image(data): # return data.get("poster") # def get_releaser_follower_num(self, releaserUrl): # self.get_cookies_and_font(releaserUrl) # releaser_id = self.get_releaser_id(releaserUrl) # releaserUrl = 'https://live.kuaishou.com/graphql' # post_dic = {"operationName":"userInfoQuery","variables":{"principalId":releaser_id},"query":"query userInfoQuery($principalId: String) {\n userInfo(principalId: $principalId) {\n id\n principalId\n kwaiId\n eid\n userId\n profile\n name\n description\n sex\n constellation\n cityName\n living\n watchingCount\n isNew\n privacy\n feeds {\n eid\n photoId\n thumbnailUrl\n timestamp\n __typename\n }\n verifiedStatus {\n verified\n description\n type\n new\n __typename\n }\n countsInfo {\n fan\n follow\n photo\n liked\n open\n playback\n private\n __typename\n }\n bannedStatus {\n banned\n defriend\n isolate\n socialBanned\n __typename\n }\n __typename\n }\n}\n"} # releaser_page = requests.post(releaserUrl, headers=self.first_page_headers, cookies=self.cookie_dic,json=post_dic) # res_dic = releaser_page.json() # # try: # releaser_follower_num_str =res_dic["data"]["userInfo"]["countsInfo"]["fan"] # releaser_follower_num = self.re_cal_count(self.unicode_to_num(releaser_follower_num_str)) # print(releaser_follower_num) # releaser_img = self.get_releaser_image(data=res_dic) # return releaser_follower_num,releaser_img # except: # return None def get_releaser_image(self, releaserUrl=None,data=None): if releaserUrl: self.get_cookies_and_font(releaserUrl) releaser_id = self.get_releaser_id(releaserUrl) releaserUrl = 'https://live.kuaishou.com/graphql' post_dic = {"operationName": "userInfoQuery", "variables": {"principalId": releaser_id}, "query": "query userInfoQuery($principalId: String) {\n userInfo(principalId: $principalId) {\n id\n principalId\n kwaiId\n eid\n userId\n profile\n name\n description\n sex\n constellation\n cityName\n living\n watchingCount\n isNew\n privacy\n feeds {\n eid\n photoId\n thumbnailUrl\n timestamp\n __typename\n }\n verifiedStatus {\n verified\n description\n type\n new\n __typename\n }\n countsInfo {\n fan\n follow\n photo\n liked\n open\n playback\n private\n __typename\n }\n bannedStatus {\n banned\n defriend\n isolate\n socialBanned\n __typename\n }\n __typename\n }\n}\n"} releaser_page = requests.post(releaserUrl, headers=self.first_page_headers, cookies=self.cookie_dic, json=post_dic) res_dic = releaser_page.json() try: releaser_img = res_dic["data"]["userInfo"]["profile"] print(releaser_img) return releaser_img except: return None else: releaser_img = data["data"]["userInfo"]["profile"] print(releaser_img) return releaser_img if __name__ == '__main__': test = Crawler_kwai() url = 'https://live.kuaishou.com/profile/IIloveyoubaby' user_lis = [ # "https://live.kuaishou.com/profile/3xx3vac2uctn2ak" "https://live.kuaishou.com/profile/3xn3bwab5q5pehc", "https://live.kuaishou.com/profile/3x25rpyd2v6qv86", "https://live.kuaishou.com/profile/3xziqtumtj6xzkm", "https://live.kuaishou.com/profile/3xugxze3x97r2w4", "https://live.kuaishou.com/profile/3xnhsx3wt8if74c", "https://live.kuaishou.com/profile/3xcdy8gzaqsy6zu", "https://live.kuaishou.com/profile/3xid2nndq4fk3t6", "https://live.kuaishou.com/profile/3xeqdvqjp553nvm", "https://live.kuaishou.com/profile/3xhyd98hf8bcxsi", "https://live.kuaishou.com/profile/3x9m4am579hrvfq", "https://live.kuaishou.com/profile/3xkkkfxxd48dei6", "https://live.kuaishou.com/profile/3xjqv4mspa4bw8i", "https://live.kuaishou.com/profile/3xgxy8yiqnqtkt4", "https://live.kuaishou.com/profile/3xxxsfnuie3ucfi", "https://live.kuaishou.com/profile/3x3t5epr3max95y", "https://live.kuaishou.com/profile/3xny3xcwghpi7yk", "https://live.kuaishou.com/profile/3xcm5jbmtyhezj4", "https://live.kuaishou.com/profile/3x7twx645iyapkm", "https://live.kuaishou.com/profile/3xzu7cr583vt5zg", "https://live.kuaishou.com/profile/3x83h23sbmjxiq4", "https://live.kuaishou.com/profile/3xijrqj69f8ckjk", "https://live.kuaishou.com/profile/3xv47uc6r7gq882", "https://live.kuaishou.com/profile/3xqv5w2kzccqk5y", "https://live.kuaishou.com/profile/3x5yh4d4wtfnvs6", "https://live.kuaishou.com/profile/3xwxakqi8whe9ju", "https://live.kuaishou.com/profile/3x58zdtbg9ecqxy", "https://live.kuaishou.com/profile/3xd6szsxerggfu4", "https://live.kuaishou.com/profile/3xzfhbh9q2afynk", "https://live.kuaishou.com/profile/3xgy42jat4kp8yk", "https://live.kuaishou.com/profile/3xq9ya2akj52f8a", "https://live.kuaishou.com/profile/3xvbgjz6vxdr4zq", "https://live.kuaishou.com/profile/3xaqiypqnwfwakg", "https://live.kuaishou.com/profile/3x57bdie5v8bag6", "https://live.kuaishou.com/profile/3x6e8hdhmtssdae", "https://live.kuaishou.com/profile/3xeszmbm3rphnn4", "https://live.kuaishou.com/profile/3x67fxqg6wd39tw", ] for u in user_lis: ttt = test.releaser_page_by_time(1577808000000,1585632335193,u, output_to_es_raw=True, es_index='crawler-data-raw', doc_type='doc', releaser_page_num_max=2000,proxies_num=1,allow=10) for t in ttt: print(t) # break