Commit 5634a341 authored by litaolemo's avatar litaolemo

update

parent 9208da8a
...@@ -137,9 +137,9 @@ for pid in pid_list: ...@@ -137,9 +137,9 @@ for pid in pid_list:
# print(desc_fix) # print(desc_fix)
res = rds.hset("xiaohongshu_with_img", key=pid, value=json.dumps(res_json)) res = rds.hset("xiaohongshu_with_img", key=pid, value=json.dumps(res_json))
if res_json["NoteView"]["author"]['id'] in gm_user_id_list: if res_json["NoteView"]["author"]['id'] in gm_user_id_list:
video_dic["level"] = "4" video_dic["level"] = "5"
else: else:
video_dic["level"] = "2" video_dic["level"] = "3"
video_dic["platform"] = "9" video_dic["platform"] = "9"
video_dic["platform_id"] = pid video_dic["platform_id"] = pid
video_dic["platform_answer_id"] = pid video_dic["platform_answer_id"] = pid
......
...@@ -33,7 +33,7 @@ platfrom_id_dict = { ...@@ -33,7 +33,7 @@ platfrom_id_dict = {
data_type_dict = { data_type_dict = {
"cims/question/batch_create": ["platform","platform_id","title","content","user_id","create_time","is_online"], "cims/question/batch_create": ["platform","platform_id","title","content","user_id","create_time","is_online"],
"cims/answer/batch_create": ["platform","platform_id","platform_question_id","content","user_id","create_time","is_online"], "cims/answer/batch_create": ["platform","platform_id","platform_question_id","content","user_id","create_time","is_online",'level'],
"cims/reply/batch_create": ["platform","platform_id","platform_answer_id","content","user_id","create_time","is_online"] "cims/reply/batch_create": ["platform","platform_id","platform_answer_id","content","user_id","create_time","is_online"]
} }
dic_type = { dic_type = {
......
...@@ -5,6 +5,9 @@ ...@@ -5,6 +5,9 @@
# @author : litao # @author : litao
import hashlib import hashlib
import json
import redis
from crawler_sys.utils.output_results import retry_get_url from crawler_sys.utils.output_results import retry_get_url
...@@ -39,24 +42,37 @@ def cc(): ...@@ -39,24 +42,37 @@ def cc():
i2 += 1 i2 += 1
if __name__ == "__main__": if __name__ == "__main__":
url = "https://www.xiaohongshu.com/user/profile/5ea6909900000000010057a3" # url = "https://www.xiaohongshu.com/user/profile/5ea6909900000000010057a3"
headers = { # headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", # "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"accept-encoding": "gzip, deflate", # "accept-encoding": "gzip, deflate",
"accept-language": "zh-CN,zh;q=0.9", # "accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache", # "cache-control": "no-cache",
"cookie": "xhsTracker=url=user-profile&xhsshare=CopyLink; xhsTrackerId=38ec6dcb-d882-45e8-c539-834386696a14; xhsuid=Dv4OTnGbDg9LivGu; timestamp2=202101062497d4bed842476b2618e0ea; timestamp2.sig=-Jax1vd_iNZtToaWYMOMoFUmCJwojKQPnfP8iMeOpAc; xhs_spses.5dde=*; extra_exp_ids=gif_clt1,ques_clt1; xhs_spid.5dde=59a50d47116c4333.1609921946.3.1610074315.1609997760.3be8232b-0407-44f8-8036-d40a4c47b120", # "cookie": "xhsTracker=url=user-profile&xhsshare=CopyLink; xhsTrackerId=38ec6dcb-d882-45e8-c539-834386696a14; xhsuid=Dv4OTnGbDg9LivGu; timestamp2=202101062497d4bed842476b2618e0ea; timestamp2.sig=-Jax1vd_iNZtToaWYMOMoFUmCJwojKQPnfP8iMeOpAc; xhs_spses.5dde=*; extra_exp_ids=gif_clt1,ques_clt1; xhs_spid.5dde=59a50d47116c4333.1609921946.3.1610074315.1609997760.3be8232b-0407-44f8-8036-d40a4c47b120",
"pragma": "no-cache", # "pragma": "no-cache",
"referer": "https://www.xiaohongshu.com/discovery/item/5f90384e000000000100b8f0", # "referer": "https://www.xiaohongshu.com/discovery/item/5f90384e000000000100b8f0",
"sec-ch-ua": '"Google Chrome";v="87", " Not;A Brand";v="99", "Chromium";v="87"', # "sec-ch-ua": '"Google Chrome";v="87", " Not;A Brand";v="99", "Chromium";v="87"',
"sec-ch-ua-mobile": "?0", # "sec-ch-ua-mobile": "?0",
"sec-fetch-dest": "document", # "sec-fetch-dest": "document",
"sec-fetch-mode": "navigate", # "sec-fetch-mode": "navigate",
"sec-fetch-site": "same-origin", # "sec-fetch-site": "same-origin",
"sec-fetch-user": "?1", # "sec-fetch-user": "?1",
"upgrade-insecure-requests": "1", # "upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", # "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"Hm_lvt_900d393eff703909946efe28447affd3": "1596187047", # "Hm_lvt_900d393eff703909946efe28447affd3": "1596187047",
} # }
res = retry_get_url(url, headers=headers, proxies=0) # res = retry_get_url(url, headers=headers, proxies=0)
print(res.text) # print(res.text)
rds = redis.StrictRedis(host='172.18.51.10', port=6379, db=17, decode_responses=True)
pid_list = rds.hkeys("xiaohongshu")
for pid in pid_list:
res = rds.hget("xiaohongshu", pid)
res_json = json.loads(res)
if res_json["NoteView"].get("comments"):
# print(res_json["NoteView"].get("data"))
for comment in res_json["NoteView"]["comments"]["data"]:
print(comment)
elif res_json["NoteView"].get("commentInfo"):
for comment in res_json["NoteView"]["commentInfo"]["comments"]:
print(comment)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment