Commit e8d6278c authored by litaolemo's avatar litaolemo

update

parent 33c3ec0c
......@@ -49,16 +49,18 @@ class Crawler_douban():
for popk in pop_key_Lst:
self.video_data.pop(popk)
self.sig_list = [
"aOI2VYvkFvPfUngaeoz%2BNYQ7MQM%3D",
"Glc52sbPO46I%2FR%2FOCjl%2BGwKo94I%3D",
"l9oVu%2FYau2UwMyhc5m8ldALp5eU%3D",
"tL36trbi73v7Y057K10%2FQ9fdCiA%3D"
# "aOI2VYvkFvPfUngaeoz%2BNYQ7MQM%3D",
# "Glc52sbPO46I%2FR%2FOCjl%2BGwKo94I%3D",
# "l9oVu%2FYau2UwMyhc5m8ldALp5eU%3D",
# "tL36trbi73v7Y057K10%2FQ9fdCiA%3D",
"vu4h6fzkqrvpNxWOYee95RPPV04="
]
self.headers = {
"User-Agent": "api-client/1 com.douban.frodo/10.39.0(189) Android/23 product/cancro vendor/Netease model/Miui rom/android network/wifi platform/AndroidPad",
"User-Agent": "api-client/1 com.douban.frodo/6.39.0(189) Android/23 product/oppo R11s Plus vendor/OPPO model/oppo R11s Plus rom/android network/wifi platform/AndroidPad",
"Host": "frodo.douban.com",
"Connection": "Keep-Alive",
"Accept-Encoding": "gzip",
"Authorization": "Bearer ee99197a01a77702cbcb4c6e04f66506",
}
def get_single_page(self,mid,proxies):
......@@ -113,26 +115,26 @@ class Crawler_douban():
"count": "20",
"sortby": "new",
"apple": "389276ed556d40cada2e208482b51cd7",
"icecream": "ffd8f7d71419a98e48819cbac587ebbd",
"icecream": "7b92c1aa7b531d1500c6e4905de2ca76",
"mooncake": "0f607264fc6318a92b9e13c65db7cd3c",
"webview_ua": "Mozilla%2F5.0%20%28Linux%3B%20Android%2010.0.1%3B%20Miui%20Build%2FV417IR%3B%20wv%29%20AppleWebKit%2F537.36%20%28KHTML%2C%20like%20Gecko%29%20Version%2F4.0%20Chrome%2F52.0.2743.100%20Mobile%20Safari%2F537.36",
"screen_width": "810",
"screen_height": "1440",
"sugar": "0",
"longitude": "0",
"latitude": "0",
"webview_ua": "Mozilla/5.0 (Linux; Android 6.0.1; oppo R11s Plus Build/V417IR; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/52.0.2743.100 Mobile Safari/537.36",
"screen_width": "1080",
"screen_height": "1920",
"sugar": "460000",
"longitude": "0.0",
"latitude": "0.0",
"os_rom": "android",
"apikey": "0dad551ec0f84ed02907ff5c42e8ec70",
"channel": "Baidu_Market",
"udid": "dc{0}e9f33c54b4bb579c49100b6f2cc0dc5cc".format(random.randint(10000,99999)),
"udid": "dc18733e9f33c54b4bb579c49100b6f2cc0dc5cc",
"_sig": random.choice(self.sig_list),
"_ts": None,
"_ts": 1598337519,
}
while page <= releaser_page_num_max and has_more:
url_dic["_ts"] = int(datetime.datetime.now().timestamp())
# url_dic["_ts"] = int(datetime.datetime.now().timestamp())
url_dic["start"] = str(page * 20)
url = "https://frodo.douban.com/api/v2/group/248952/topics?%s" % urllib.parse.urlencode(url_dic)
url = "http://frodo.douban.com/api/v2/group/248952/topics?%s" % urllib.parse.urlencode(url_dic)
try:
if proxies_num:
get_page = retry_get_url(url, headers=self.headers, timeout=self.timeout, proxies=proxies_num)
......@@ -288,5 +290,5 @@ if __name__ == '__main__':
for u in url_list:
test.releaser_page_by_time(1590940800000, 1595468554268, u, output_to_es_register=True,
es_index='crawler-data-raw',
doc_type='doc', releaser_page_num_max=4000)
doc_type='doc', releaser_page_num_max=4000,proxies_num=1)
# test.get_single_page(4524055937468233)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment