Commit 06770bba authored by litaolemo's avatar litaolemo

update

parent 882a255e
......@@ -51,7 +51,7 @@ class CrawlerDouban():
self.video_data.pop(popk)
self.headers = {
"User-Agent": "api-client/1 com.douban.frodo/6.42.2(194) Android/22 product/shamu vendor/OPPO model/OPPO R11 Plus rom/android network/wifi platform/mobile nd/1",
"User-Agent": 'api-client/1 com.douban.frodo/6.42.2(194) Android/22 product/shamu vendor/OPPO model/OPPO R11 Plus rom/android network/wifi platform/mobile nd/1',
# "Host": "frodo.douban.com",
# "Connection": "Keep-Alive",
# "Accept-Encoding": "gzip",
......@@ -65,9 +65,10 @@ class CrawlerDouban():
count_true = 0
while count_true <= 5:
try:
ts,sig = self.get_sig('/api/v2/group/topic/{0}'.format(mid))
count_true += 1
url = "https://frodo.douban.com/api/v2/group/topic/{0}?event_source=search&os_rom=android&apikey=0dad551ec0f84ed02907ff5c42e8ec70&channel=Baidu_Market&udid=dc18733e9f33c54b4bb579c49100b6f2cc0dc5cc&_sig={1}&_ts=1598339497".format(
mid, 'lgrIVA7Zvp7r0+WuOe4APb9EL0A=')
url = "https://frodo.douban.com/api/v2/group/topic/{mid}?event_source=search&apikey=0dad551ec0f84ed02907ff5c42e8ec70&_sig={sig}&_ts={ts}".format(
mid=mid,ts=ts,sig=sig)
page_res = retry_get_url(url,headers=self.headers,proxies=proxies)
page_json = page_res.json()
......@@ -108,19 +109,19 @@ class CrawlerDouban():
sign = "bf7dddc7c9cfe6f7"
url_limit = url.split("?")[0].replace("http://frodo.douban.com",'')
url_limit = urllib.parse.quote(url_limit,safe='')
ts = str(int(datetime.datetime.now().timestamp()))
ts = '1600423244'
# ts = str(int(datetime.datetime.now().timestamp()))
ts = '1600650372'
url_str = 'GET&%s&%s' % (url_limit,ts)
# print(url_str)
sig_sha1 = hmac.new(sign.encode('utf-8'), url_str.encode('utf-8'), digestmod='SHA1')
sig_sha1 = sig_sha1.hexdigest().upper()
bytes_arry = bytearray.fromhex(sig_sha1)
# print([x for x in bytearray(sig.hexdigest(),'utf_8')])
# print(bytearray(sig.hexdigest()))
# binData = ''.join(map(lambda x: chr(x % 256), data))
sig = bytes.decode(base64.encodebytes(bytes_arry)).replace('\n','')
print(urllib.parse.quote(sig,safe=''))
return ts, urllib.parse.quote(sig,safe='')
# bytes_arry = bytearray.fromhex(sig_sha1)
# print([x for x in bytearray(bytes_arry,'utf_8')])
# print(bytearray(sig_sha1.hexdigest()))
# print(''.join(map(lambda x: chr(x % 256), bytearray(sig_sha1.hexdigest()))))
sig = bytes.decode(base64.encodebytes(bytearray.fromhex(sig_sha1))).replace('\n','')
# print(urllib.parse.quote(sig,safe=''))
return ts, sig
def gooseneck(self,releaserUrl,output_to_file=False, filepath=None,
output_to_es_raw=False,
......@@ -133,8 +134,8 @@ class CrawlerDouban():
has_more = True
ts,sig = self.get_sig('/api/v2/group/248952/topics')
url_dic = {
"start": None,
"count": "20",
# "start": None,
"count": "100",
"sortby": "new",
# "apple": "389276ed556d40cada2e208482b51cd7",
# "icecream": "7b92c1aa7b531d1500c6e4905de2ca76",
......@@ -155,7 +156,8 @@ class CrawlerDouban():
}
while page <= releaser_page_num_max and has_more:
# url_dic["_ts"] = int(datetime.datetime.now().timestamp())
url_dic["start"] = str(page * 20)
if page:
url_dic["start"] = str(page * 20)
if "hot_tag" in releaserUrl:
url_dic["sortby"] = "hot"
elif "new_tag" in releaserUrl:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment