Commit cb9f6c08 authored by litaolemo's avatar litaolemo

update

parent 86b39438
......@@ -109,4 +109,5 @@ def send_one_data_to_mysql(_id):
if __name__ == "__main__":
# scan_es_to_mysql()
send_one_data_to_mysql("zhihu_283857656_480262861")
# send_one_data_to_mysql("zhihu_283857656_480262861")
send_one_data_to_mysql("zhihu_65123027_648018097")
......@@ -249,28 +249,6 @@ class Crawler_zhihu():
proxies_num=proxies_num,**kwargs)
@staticmethod
def get_single_page(mid):
url = "https://m.weibo.cn/status/%s" % mid
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
# "cookie": "_T_WM=68345544646; WEIBOCN_FROM=1110006030; MLOGIN=0; XSRF-TOKEN=fd1a69; M_WEIBOCN_PARAMS=oid%3D4523948446845543%26luicode%3D20000061%26lfid%3D4528703037509890%26uicode%3D20000061%26fid%3D4523948446845543",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "same-origin",
"sec-fetch-site": "same-origin",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36",
}
page_res = retry_get_url(url, headers=headers, proxies=0)
page_json_context = re.findall(r"render_data = (.*)\[0\]", page_res.text, flags=re.DOTALL)[0]
page_json = json.loads(page_json_context)
text = dehtml(page_json[0]["status"]["text"])
repost_count = trans_play_count(page_json[0]["status"]["reposts_count"])
comment_count = trans_play_count(page_json[0]["status"]["comments_count"])
favorite_count = trans_play_count(page_json[0]["status"]["attitudes_count"])
return text, repost_count, comment_count, favorite_count
def get_releaser_id(self, releaserUrl):
return get_releaser_id(platform=self.platform, releaserUrl=releaserUrl)
......
......@@ -161,6 +161,7 @@ def push_data_to_user(res_data: Dict) -> Dict:
res_data["content"] = content
# 处理格式
res_data["content"] = gm_convert_html_tags(res_data["content"], all_tags=True)
print(res_data)
return res_data
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment