Commit f74bbb7f authored by litaolemo's avatar litaolemo

update

parent 42f394f9
......@@ -62,7 +62,7 @@ class CrawlerDouban():
def get_single_page(self,mid,proxies):
count_true = 0
while count_true <= 3:
while count_true <= 5:
try:
count_true += 1
url = "https://frodo.douban.com/api/v2/group/topic/{0}?event_source=search&os_rom=android&apikey=0dad551ec0f84ed02907ff5c42e8ec70&channel=Baidu_Market&_sig={2}&udid=dc{1}e9f33c54b4bb579c49100b6f2cc0dc5cc".format(mid,random.randint(10000,99999),random.choice(self.sig_list))
......@@ -93,6 +93,8 @@ class CrawlerDouban():
except Exception as e:
print("single page error %s"% e)
continue
print("single page error")
return None
def get_releaser_id(self, releaserUrl):
return get_releaser_id(platform=self.platform, releaserUrl=releaserUrl)
......@@ -159,8 +161,8 @@ class CrawlerDouban():
for one in page_dic:
releaser_id = one["author"]["id"]
mid = one["id"]
try:
if True:
# try:
res_dic = {
"release_time": trans_strtime_to_timestamp(one["create_time"]),
"url": one["url"],
......@@ -180,13 +182,13 @@ class CrawlerDouban():
doc_id_type="all-time-url")
res_dic["doc_id"] = doc_id
res_dic.update(self.get_single_page(mid,proxies_num))
print(res_dic)
# print(res_dic)
yield res_dic
except Exception as e:
print(one)
print("row formate error %s" % e)
continue
# except Exception as e:
# print(one)
# print("row formate error %s" % e)
# continue
# @logged
def releaser_page(self, releaserUrl,
......@@ -244,7 +246,9 @@ if __name__ == '__main__':
# for r in res:
# print(r)
for u in url_list:
test.releaser_page_by_time(1590940800000, 1595468554268, u, output_to_es_register=True,
ttt = test.releaser_page_by_time(1590940800000, 1595468554268, u, output_to_es_register=True,
es_index='crawler-data-raw',
doc_type='doc', releaser_page_num_max=4000)
doc_type='doc', releaser_page_num_max=4000,allow=20)
for t in ttt:
print(t)
# test.get_single_page(4524055937468233)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment