Commit e8d6278c authored by litaolemo's avatar litaolemo

update

parent 33c3ec0c
...@@ -49,16 +49,18 @@ class Crawler_douban(): ...@@ -49,16 +49,18 @@ class Crawler_douban():
for popk in pop_key_Lst: for popk in pop_key_Lst:
self.video_data.pop(popk) self.video_data.pop(popk)
self.sig_list = [ self.sig_list = [
"aOI2VYvkFvPfUngaeoz%2BNYQ7MQM%3D", # "aOI2VYvkFvPfUngaeoz%2BNYQ7MQM%3D",
"Glc52sbPO46I%2FR%2FOCjl%2BGwKo94I%3D", # "Glc52sbPO46I%2FR%2FOCjl%2BGwKo94I%3D",
"l9oVu%2FYau2UwMyhc5m8ldALp5eU%3D", # "l9oVu%2FYau2UwMyhc5m8ldALp5eU%3D",
"tL36trbi73v7Y057K10%2FQ9fdCiA%3D" # "tL36trbi73v7Y057K10%2FQ9fdCiA%3D",
"vu4h6fzkqrvpNxWOYee95RPPV04="
] ]
self.headers = { self.headers = {
"User-Agent": "api-client/1 com.douban.frodo/10.39.0(189) Android/23 product/cancro vendor/Netease model/Miui rom/android network/wifi platform/AndroidPad", "User-Agent": "api-client/1 com.douban.frodo/6.39.0(189) Android/23 product/oppo R11s Plus vendor/OPPO model/oppo R11s Plus rom/android network/wifi platform/AndroidPad",
"Host": "frodo.douban.com", "Host": "frodo.douban.com",
"Connection": "Keep-Alive", "Connection": "Keep-Alive",
"Accept-Encoding": "gzip", "Accept-Encoding": "gzip",
"Authorization": "Bearer ee99197a01a77702cbcb4c6e04f66506",
} }
def get_single_page(self,mid,proxies): def get_single_page(self,mid,proxies):
...@@ -113,26 +115,26 @@ class Crawler_douban(): ...@@ -113,26 +115,26 @@ class Crawler_douban():
"count": "20", "count": "20",
"sortby": "new", "sortby": "new",
"apple": "389276ed556d40cada2e208482b51cd7", "apple": "389276ed556d40cada2e208482b51cd7",
"icecream": "ffd8f7d71419a98e48819cbac587ebbd", "icecream": "7b92c1aa7b531d1500c6e4905de2ca76",
"mooncake": "0f607264fc6318a92b9e13c65db7cd3c", "mooncake": "0f607264fc6318a92b9e13c65db7cd3c",
"webview_ua": "Mozilla%2F5.0%20%28Linux%3B%20Android%2010.0.1%3B%20Miui%20Build%2FV417IR%3B%20wv%29%20AppleWebKit%2F537.36%20%28KHTML%2C%20like%20Gecko%29%20Version%2F4.0%20Chrome%2F52.0.2743.100%20Mobile%20Safari%2F537.36", "webview_ua": "Mozilla/5.0 (Linux; Android 6.0.1; oppo R11s Plus Build/V417IR; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/52.0.2743.100 Mobile Safari/537.36",
"screen_width": "810", "screen_width": "1080",
"screen_height": "1440", "screen_height": "1920",
"sugar": "0", "sugar": "460000",
"longitude": "0", "longitude": "0.0",
"latitude": "0", "latitude": "0.0",
"os_rom": "android", "os_rom": "android",
"apikey": "0dad551ec0f84ed02907ff5c42e8ec70", "apikey": "0dad551ec0f84ed02907ff5c42e8ec70",
"channel": "Baidu_Market", "channel": "Baidu_Market",
"udid": "dc{0}e9f33c54b4bb579c49100b6f2cc0dc5cc".format(random.randint(10000,99999)), "udid": "dc18733e9f33c54b4bb579c49100b6f2cc0dc5cc",
"_sig": random.choice(self.sig_list), "_sig": random.choice(self.sig_list),
"_ts": None, "_ts": 1598337519,
} }
while page <= releaser_page_num_max and has_more: while page <= releaser_page_num_max and has_more:
url_dic["_ts"] = int(datetime.datetime.now().timestamp()) # url_dic["_ts"] = int(datetime.datetime.now().timestamp())
url_dic["start"] = str(page * 20) url_dic["start"] = str(page * 20)
url = "https://frodo.douban.com/api/v2/group/248952/topics?%s" % urllib.parse.urlencode(url_dic) url = "http://frodo.douban.com/api/v2/group/248952/topics?%s" % urllib.parse.urlencode(url_dic)
try: try:
if proxies_num: if proxies_num:
get_page = retry_get_url(url, headers=self.headers, timeout=self.timeout, proxies=proxies_num) get_page = retry_get_url(url, headers=self.headers, timeout=self.timeout, proxies=proxies_num)
...@@ -288,5 +290,5 @@ if __name__ == '__main__': ...@@ -288,5 +290,5 @@ if __name__ == '__main__':
for u in url_list: for u in url_list:
test.releaser_page_by_time(1590940800000, 1595468554268, u, output_to_es_register=True, test.releaser_page_by_time(1590940800000, 1595468554268, u, output_to_es_register=True,
es_index='crawler-data-raw', es_index='crawler-data-raw',
doc_type='doc', releaser_page_num_max=4000) doc_type='doc', releaser_page_num_max=4000,proxies_num=1)
# test.get_single_page(4524055937468233) # test.get_single_page(4524055937468233)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment