update

e8d6278c · litaolemo · 33c3ec0c · e8d6278c
Commit e8d6278c authored Aug 25, 2020 by litaolemo
Hide whitespace changes
Inline Side-by-side

Showing with 20 additions and 17 deletions

crawler_douban.py crawler_sys/site_crawler_test/crawler_douban.py +20 -17

No files found.
--- a/crawler_sys/site_crawler_test/crawler_douban.py
+++ b/crawler_sys/site_crawler_test/crawler_douban.py
@@ -49,16 +49,18 @@ class Crawler_douban():
        for popk in pop_key_Lst:
            self.video_data.pop(popk)
        self.sig_list = [
-            "aOI2VYvkFvPfUngaeoz%2BNYQ7MQM%3D",
+            # "aOI2VYvkFvPfUngaeoz%2BNYQ7MQM%3D",
-            "Glc52sbPO46I%2FR%2FOCjl%2BGwKo94I%3D",
+            # "Glc52sbPO46I%2FR%2FOCjl%2BGwKo94I%3D",
-            "l9oVu%2FYau2UwMyhc5m8ldALp5eU%3D",
+            # "l9oVu%2FYau2UwMyhc5m8ldALp5eU%3D",
-            "tL36trbi73v7Y057K10%2FQ9fdCiA%3D"
+            # "tL36trbi73v7Y057K10%2FQ9fdCiA%3D",
+            "vu4h6fzkqrvpNxWOYee95RPPV04="
        ]
        self.headers = {
-            "User-Agent": "api-client/1 com.douban.frodo/10.39.0(189) Android/23 product/cancro vendor/Netease model/Miui rom/android  network/wifi  platform/AndroidPad",
+            "User-Agent": "api-client/1 com.douban.frodo/6.39.0(189) Android/23 product/oppo R11s Plus vendor/OPPO model/oppo R11s Plus  rom/android  network/wifi  platform/AndroidPad",
            "Host": "frodo.douban.com",
            "Connection": "Keep-Alive",
            "Accept-Encoding": "gzip",
+            "Authorization": "Bearer ee99197a01a77702cbcb4c6e04f66506",
        }
    def get_single_page(self,mid,proxies):
@@ -113,26 +115,26 @@ class Crawler_douban():
            "count": "20",
            "sortby": "new",
            "apple": "389276ed556d40cada2e208482b51cd7",
-            "icecream": "ffd8f7d71419a98e48819cbac587ebbd",
+            "icecream": "7b92c1aa7b531d1500c6e4905de2ca76",
            "mooncake": "0f607264fc6318a92b9e13c65db7cd3c",
-            "webview_ua": "Mozilla%2F5.0%20%28Linux%3B%20Android%2010.0.1%3B%20Miui%20Build%2FV417IR%3B%20wv%29%20AppleWebKit%2F537.36%20%28KHTML%2C%20like%20Gecko%29%20Version%2F4.0%20Chrome%2F52.0.2743.100%20Mobile%20Safari%2F537.36",
+            "webview_ua": "Mozilla/5.0 (Linux; Android 6.0.1; oppo R11s Plus Build/V417IR; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/52.0.2743.100 Mobile Safari/537.36",
-            "screen_width": "810",
+            "screen_width": "1080",
-            "screen_height": "1440",
+            "screen_height": "1920",
-            "sugar": "0",
+            "sugar": "460000",
-            "longitude": "0",
+            "longitude": "0.0",
-            "latitude": "0",
+            "latitude": "0.0",
            "os_rom": "android",
            "apikey": "0dad551ec0f84ed02907ff5c42e8ec70",
            "channel": "Baidu_Market",
-            "udid": "dc{0}e9f33c54b4bb579c49100b6f2cc0dc5cc".format(random.randint(10000,99999)),
+            "udid": "dc18733e9f33c54b4bb579c49100b6f2cc0dc5cc",
            "_sig": random.choice(self.sig_list),
-            "_ts": None,
+            "_ts": 1598337519,
        }
        while page <= releaser_page_num_max and has_more:
-            url_dic["_ts"] = int(datetime.datetime.now().timestamp())
+            # url_dic["_ts"] = int(datetime.datetime.now().timestamp())
            url_dic["start"] = str(page * 20)
-            url = "https://frodo.douban.com/api/v2/group/248952/topics?%s" % urllib.parse.urlencode(url_dic)
+            url = "http://frodo.douban.com/api/v2/group/248952/topics?%s" % urllib.parse.urlencode(url_dic)
            try:
                if proxies_num:
                    get_page = retry_get_url(url, headers=self.headers, timeout=self.timeout, proxies=proxies_num)
@@ -288,5 +290,5 @@ if __name__ == '__main__':
    for u in url_list:
        test.releaser_page_by_time(1590940800000, 1595468554268, u, output_to_es_register=True,
                                          es_index='crawler-data-raw',
-                                          doc_type='doc', releaser_page_num_max=4000)
+                                          doc_type='doc', releaser_page_num_max=4000,proxies_num=1)
    # test.get_single_page(4524055937468233)
\ No newline at end of file