Commit 2838ff4e authored by litaolemo's avatar litaolemo

update

parent 9f62ee57
......@@ -144,8 +144,7 @@ class CrawlerDouban():
else:
get_page = retry_get_url(url, headers=self.headers, timeout=self.timeout)
except:
get_page = None
has_more = False
continue
if get_page and get_page.status_code == 200:
try:
page_json = get_page.json()
......@@ -157,35 +156,39 @@ class CrawlerDouban():
except Exception as e:
print("load data error %s" % e)
continue
if page_dic:
for one in page_dic:
releaser_id = one["author"]["id"]
mid = one["id"]
if True:
# try:
res_dic = {
"release_time": trans_strtime_to_timestamp(one["create_time"]),
"fetch_time": int(datetime.datetime.now().timestamp()*1e3),
"url": one["url"],
"releaser": one["author"]["name"],
"repost_count": None,
"comment_count": trans_play_count(one["comments_count"]),
"favorite_count": None,
"title": one["title"],
"releaserUrl": "https://www.douban.com/people/%s" % releaser_id,
"releaser_id_str": "douban_%s" % releaser_id,
'video_img':one["cover_url"],
"mid":mid,
"platform":"douban",
# "doc_id":doc_id
}
doc_id = cal_doc_id(platform=res_dic["platform"], url=res_dic["url"], data_dict=res_dic,
doc_id_type="all-time-url")
res_dic["doc_id"] = doc_id
res_dic.update(self.get_single_page(mid,proxies_num))
# print(res_dic)
yield res_dic
try:
releaser_id = one["author"]["id"]
mid = one["id"]
if True:
# try:
res_dic = {
"release_time": trans_strtime_to_timestamp(one["create_time"]),
"fetch_time": int(datetime.datetime.now().timestamp()*1e3),
"url": one["url"],
"releaser": one["author"]["name"],
"repost_count": None,
"comment_count": trans_play_count(one["comments_count"]),
"favorite_count": None,
"title": one["title"],
"releaserUrl": "https://www.douban.com/people/%s" % releaser_id,
"releaser_id_str": "douban_%s" % releaser_id,
'video_img':one["cover_url"],
"mid":mid,
"platform":"douban",
# "doc_id":doc_id
}
doc_id = cal_doc_id(platform=res_dic["platform"], url=res_dic["url"], data_dict=res_dic,
doc_id_type="all-time-url")
res_dic["doc_id"] = doc_id
res_dic.update(self.get_single_page(mid,proxies_num))
# print(res_dic)
yield res_dic
except Exception as e:
print("single data parse error %s " %e)
# except Exception as e:
# print(one)
# print("row formate error %s" % e)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment