Commit 06770bba authored by litaolemo's avatar litaolemo

update

parent 882a255e
...@@ -51,7 +51,7 @@ class CrawlerDouban(): ...@@ -51,7 +51,7 @@ class CrawlerDouban():
self.video_data.pop(popk) self.video_data.pop(popk)
self.headers = { self.headers = {
"User-Agent": "api-client/1 com.douban.frodo/6.42.2(194) Android/22 product/shamu vendor/OPPO model/OPPO R11 Plus rom/android network/wifi platform/mobile nd/1", "User-Agent": 'api-client/1 com.douban.frodo/6.42.2(194) Android/22 product/shamu vendor/OPPO model/OPPO R11 Plus rom/android network/wifi platform/mobile nd/1',
# "Host": "frodo.douban.com", # "Host": "frodo.douban.com",
# "Connection": "Keep-Alive", # "Connection": "Keep-Alive",
# "Accept-Encoding": "gzip", # "Accept-Encoding": "gzip",
...@@ -65,9 +65,10 @@ class CrawlerDouban(): ...@@ -65,9 +65,10 @@ class CrawlerDouban():
count_true = 0 count_true = 0
while count_true <= 5: while count_true <= 5:
try: try:
ts,sig = self.get_sig('/api/v2/group/topic/{0}'.format(mid))
count_true += 1 count_true += 1
url = "https://frodo.douban.com/api/v2/group/topic/{0}?event_source=search&os_rom=android&apikey=0dad551ec0f84ed02907ff5c42e8ec70&channel=Baidu_Market&udid=dc18733e9f33c54b4bb579c49100b6f2cc0dc5cc&_sig={1}&_ts=1598339497".format( url = "https://frodo.douban.com/api/v2/group/topic/{mid}?event_source=search&apikey=0dad551ec0f84ed02907ff5c42e8ec70&_sig={sig}&_ts={ts}".format(
mid, 'lgrIVA7Zvp7r0+WuOe4APb9EL0A=') mid=mid,ts=ts,sig=sig)
page_res = retry_get_url(url,headers=self.headers,proxies=proxies) page_res = retry_get_url(url,headers=self.headers,proxies=proxies)
page_json = page_res.json() page_json = page_res.json()
...@@ -108,19 +109,19 @@ class CrawlerDouban(): ...@@ -108,19 +109,19 @@ class CrawlerDouban():
sign = "bf7dddc7c9cfe6f7" sign = "bf7dddc7c9cfe6f7"
url_limit = url.split("?")[0].replace("http://frodo.douban.com",'') url_limit = url.split("?")[0].replace("http://frodo.douban.com",'')
url_limit = urllib.parse.quote(url_limit,safe='') url_limit = urllib.parse.quote(url_limit,safe='')
ts = str(int(datetime.datetime.now().timestamp())) # ts = str(int(datetime.datetime.now().timestamp()))
ts = '1600423244' ts = '1600650372'
url_str = 'GET&%s&%s' % (url_limit,ts) url_str = 'GET&%s&%s' % (url_limit,ts)
# print(url_str) # print(url_str)
sig_sha1 = hmac.new(sign.encode('utf-8'), url_str.encode('utf-8'), digestmod='SHA1') sig_sha1 = hmac.new(sign.encode('utf-8'), url_str.encode('utf-8'), digestmod='SHA1')
sig_sha1 = sig_sha1.hexdigest().upper() sig_sha1 = sig_sha1.hexdigest().upper()
bytes_arry = bytearray.fromhex(sig_sha1) # bytes_arry = bytearray.fromhex(sig_sha1)
# print([x for x in bytearray(sig.hexdigest(),'utf_8')]) # print([x for x in bytearray(bytes_arry,'utf_8')])
# print(bytearray(sig.hexdigest())) # print(bytearray(sig_sha1.hexdigest()))
# binData = ''.join(map(lambda x: chr(x % 256), data)) # print(''.join(map(lambda x: chr(x % 256), bytearray(sig_sha1.hexdigest()))))
sig = bytes.decode(base64.encodebytes(bytes_arry)).replace('\n','') sig = bytes.decode(base64.encodebytes(bytearray.fromhex(sig_sha1))).replace('\n','')
print(urllib.parse.quote(sig,safe='')) # print(urllib.parse.quote(sig,safe=''))
return ts, urllib.parse.quote(sig,safe='') return ts, sig
def gooseneck(self,releaserUrl,output_to_file=False, filepath=None, def gooseneck(self,releaserUrl,output_to_file=False, filepath=None,
output_to_es_raw=False, output_to_es_raw=False,
...@@ -133,8 +134,8 @@ class CrawlerDouban(): ...@@ -133,8 +134,8 @@ class CrawlerDouban():
has_more = True has_more = True
ts,sig = self.get_sig('/api/v2/group/248952/topics') ts,sig = self.get_sig('/api/v2/group/248952/topics')
url_dic = { url_dic = {
"start": None, # "start": None,
"count": "20", "count": "100",
"sortby": "new", "sortby": "new",
# "apple": "389276ed556d40cada2e208482b51cd7", # "apple": "389276ed556d40cada2e208482b51cd7",
# "icecream": "7b92c1aa7b531d1500c6e4905de2ca76", # "icecream": "7b92c1aa7b531d1500c6e4905de2ca76",
...@@ -155,7 +156,8 @@ class CrawlerDouban(): ...@@ -155,7 +156,8 @@ class CrawlerDouban():
} }
while page <= releaser_page_num_max and has_more: while page <= releaser_page_num_max and has_more:
# url_dic["_ts"] = int(datetime.datetime.now().timestamp()) # url_dic["_ts"] = int(datetime.datetime.now().timestamp())
url_dic["start"] = str(page * 20) if page:
url_dic["start"] = str(page * 20)
if "hot_tag" in releaserUrl: if "hot_tag" in releaserUrl:
url_dic["sortby"] = "hot" url_dic["sortby"] = "hot"
elif "new_tag" in releaserUrl: elif "new_tag" in releaserUrl:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment