Commit 53307e8c authored by litaolemo's avatar litaolemo

更新小红书爬虫

parent c68e576c
......@@ -9,7 +9,7 @@ import random
import time
import redis, json
from crawler.crawler_sys.utils.rpc_data_to_answer import post_single_data,post_muilty_data
# from crawler.crawler_sys.utils.rpc_data_to_answer import post_single_data,post_muilty_data
from crawler_sys.utils.output_results import retry_get_url
from crawler.gm_upload.gm_upload import upload, upload_file
from crawler.crawler_sys.scheduler.redis_to_rpc.rpc_config import *
......@@ -20,6 +20,161 @@ gm_user_id_list = [
'5c20dd200000000007027c07',
'5fe1c1ba0000000001006e65']
def xiaohongshu_pc(res_json,pid):
video_dic = {}
qiniu_img_list = []
try:
pid = res_json["NoteView"]["commentInfo"]["targetNoteId"]
except:
pid = res_json["NoteView"]["content"]["id"]
for img_url in res_json["NoteView"]["content"]["imageList"]:
try:
img_wb = retry_get_url("http:" + img_url["url"].replace(img_url['fileId'], img_url['traceId'])).content
res = upload(img_wb, img_type=99)
# print(res)
img_info = retry_get_url(res + "-imageinfo")
img_info_json = img_info.json()
qiniu_img_list.append('<img src="' + res + '-w">')
except Exception as e:
print("down load img error %s" % e)
continue
try:
desc_fix = "<p>" + res_json["NoteView"]["content"]['desc'].replace('\n', '<br>') + "".join(
qiniu_img_list) + "</p>"
res_json["NoteView"]["content"]["desc_fix"] = desc_fix
if res_json["NoteView"]["author"]['id'] in gm_user_id_list:
video_dic["level"] = "5"
else:
video_dic["level"] = "3"
video_dic["platform"] = "9"
video_dic["platform_id"] = pid
video_dic["platform_answer_id"] = pid
video_dic["title"] = res_json["NoteView"]["content"]["title"]
user_id_list_copy = copy.deepcopy(user_id_list)
qustion_id = random.choice(user_id_list_copy)
user_id_list_copy.remove(qustion_id)
video_dic["user_id"] = qustion_id
create_time = datetime.datetime.strptime(res_json["NoteView"]["content"]["time"],
'%Y-%m-%d %H:%M')
video_dic["create_time"] = create_time.timestamp()
rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/question/batch_create")
# print(rpc_res)
video_dic["platform_question_id"] = pid
video_dic["content"] = desc_fix
video_dic["user_id"] = random.choice(user_id_list_copy)
rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/answer/batch_create")
comment_list = []
try:
if res_json["NoteView"].get("comments"):
# print(res_json["NoteView"].get("data"))
for comment in res_json["NoteView"]["comments"]["data"]:
video_dic["content"] = comment['content']
video_dic["platform_id"] = comment['id']
comment_id_list_copy = copy.deepcopy(majiayonghu_list)
comment_id = random.choice(comment_id_list_copy)
video_dic["user_id"] = comment_id
comment_id_list_copy.remove(comment_id)
video_dic["create_time"] = (create_time + datetime.timedelta(hours=random.randint(0, 24),
minutes=random.randint(0,
60))).timestamp()
comment_list.append(copy.deepcopy(video_dic))
# rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/reply/batch_create")
elif res_json["NoteView"].get("commentInfo"):
for comment in res_json["NoteView"]["commentInfo"]["comments"]:
video_dic["content"] = comment['content']
video_dic["platform_id"] = comment['id']
comment_id_list_copy = copy.deepcopy(majiayonghu_list)
comment_id = random.choice(comment_id_list_copy)
video_dic["user_id"] = comment_id
comment_id_list_copy.remove(comment_id)
video_dic["create_time"] = (create_time + datetime.timedelta(hours=random.randint(0, 24),
minutes=random.randint(0,
60))).timestamp()
comment_list.append(copy.deepcopy(video_dic))
# rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/reply/batch_create")
if comment_list:
rpc_res = post_muilty_data(comment_list, "cims/reply/batch_create")
except Exception as e:
print("comment error")
print(e)
except Exception as e:
print(e)
return video_dic
def xiaohongshu_xiaochengxu(res_json):
video_dic = {}
qiniu_img_list = []
try:
pid = res_json["data"]["id"]
except:
pass
for img_url in res_json["data"]["imageList"]:
try:
img_wb = retry_get_url(img_url["url"].replace(img_url['fileId'], img_url['traceId'])).content
res = upload(img_wb, img_type=99)
# print(res)
img_info = retry_get_url(res + "-imageinfo")
img_info_json = img_info.json()
qiniu_img_list.append('<img src="' + res + '-w">')
except Exception as e:
print("down load img error %s" % e)
continue
try:
desc_fix = "<p>" + res_json["data"]['desc'].replace('\n', '<br>') + "".join(
qiniu_img_list) + "</p>"
if res_json["data"]["user"]['id'] in gm_user_id_list:
video_dic["level"] = "5"
else:
video_dic["level"] = "3"
video_dic["platform"] = "9"
video_dic["platform_id"] = pid
video_dic["platform_answer_id"] = pid
video_dic["title"] = res_json["data"]["title"]
user_id_list_copy = copy.deepcopy(user_id_list)
qustion_id = random.choice(user_id_list_copy)
user_id_list_copy.remove(qustion_id)
video_dic["user_id"] = qustion_id
create_time = datetime.datetime.strptime(res_json["data"]["time"],
'%Y-%m-%d %H:%M')
video_dic["create_time"] = create_time.timestamp()
rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/question/batch_create")
# print(rpc_res)
video_dic["platform_question_id"] = pid
video_dic["content"] = desc_fix
video_dic["user_id"] = random.choice(user_id_list_copy)
rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/answer/batch_create")
comment_list = []
try:
if res_json["data"].get("commentList"):
# print(res_json["NoteView"].get("data"))
for comment in res_json["data"]["commentList"]:
video_dic["content"] = comment['content']
video_dic["platform_id"] = comment['id']
comment_id_list_copy = copy.deepcopy(majiayonghu_list)
comment_id = random.choice(comment_id_list_copy)
video_dic["user_id"] = comment_id
comment_id_list_copy.remove(comment_id)
video_dic["create_time"] = (create_time + datetime.timedelta(hours=random.randint(0, 24),
minutes=random.randint(0,
60))).timestamp()
comment_list.append(copy.deepcopy(video_dic))
# rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/reply/batch_create")
# rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/reply/batch_create")
if comment_list:
rpc_res = post_muilty_data(comment_list, "cims/reply/batch_create")
except Exception as e:
print("comment error")
print(e)
except Exception as e:
print(e)
return video_dic
rds = redis.StrictRedis(host='172.18.51.10', port=6379, db=20, decode_responses=True)
while True:
......@@ -32,82 +187,11 @@ while True:
continue
line = rds.hget("xiaohongshu", pid)
res_json = json.loads(line)
video_dic = {}
qiniu_img_list = []
try:
pid = res_json["NoteView"]["commentInfo"]["targetNoteId"]
except:
pid = res_json["NoteView"]["content"]["id"]
for img_url in res_json["NoteView"]["content"]["imageList"]:
try:
img_wb = retry_get_url("http:" + img_url["url"].replace(img_url['fileId'],img_url['traceId'])).content
res = upload(img_wb, img_type=99)
# print(res)
img_info = retry_get_url(res + "-imageinfo")
img_info_json = img_info.json()
qiniu_img_list.append('<img src="' + res + '-w">')
except Exception as e:
print("down load img error %s" % e)
continue
try:
desc_fix = "<p>" + res_json["NoteView"]["content"]['desc'].replace('\n', '<br>') + "".join(qiniu_img_list) + "</p>"
res_json["NoteView"]["content"]["desc_fix"] = desc_fix
if res_json["NoteView"]["author"]['id'] in gm_user_id_list:
video_dic["level"] = "5"
else:
video_dic["level"] = "3"
video_dic["platform"] = "9"
video_dic["platform_id"] = pid
video_dic["platform_answer_id"] = pid
video_dic["title"] = res_json["NoteView"]["content"]["title"]
if res_json.get("NoteView"):
xiaohongshu_pc(res_json,pid)
elif res_json.get("data"):
xiaohongshu_xiaochengxu(res_json)
user_id_list_copy = copy.deepcopy(user_id_list)
qustion_id = random.choice(user_id_list_copy)
user_id_list_copy.remove(qustion_id)
video_dic["user_id"] = qustion_id
create_time = datetime.datetime.strptime(res_json["NoteView"]["content"]["time"],
'%Y-%m-%d %H:%M')
video_dic["create_time"] = create_time.timestamp()
rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/question/batch_create")
# print(rpc_res)
video_dic["platform_question_id"] = pid
video_dic["content"] = desc_fix
video_dic["user_id"] = random.choice(user_id_list_copy)
rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/answer/batch_create")
comment_list = []
try:
if res_json["NoteView"].get("comments"):
# print(res_json["NoteView"].get("data"))
for comment in res_json["NoteView"]["comments"]["data"]:
video_dic["content"] = comment['content']
video_dic["platform_id"] = comment['id']
comment_id_list_copy = copy.deepcopy(majiayonghu_list)
comment_id = random.choice(comment_id_list_copy)
video_dic["user_id"] = comment_id
comment_id_list_copy.remove(comment_id)
video_dic["create_time"] = (create_time + datetime.timedelta(hours=random.randint(0, 24),
minutes=random.randint(0, 60))).timestamp()
comment_list.append(copy.deepcopy(video_dic))
# rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/reply/batch_create")
elif res_json["NoteView"].get("commentInfo"):
for comment in res_json["NoteView"]["commentInfo"]["comments"]:
video_dic["content"] = comment['content']
video_dic["platform_id"] = comment['id']
comment_id_list_copy = copy.deepcopy(majiayonghu_list)
comment_id = random.choice(comment_id_list_copy)
video_dic["user_id"] = comment_id
comment_id_list_copy.remove(comment_id)
video_dic["create_time"] = (create_time + datetime.timedelta(hours=random.randint(0,24),minutes=random.randint(0,60))).timestamp()
comment_list.append(copy.deepcopy(video_dic))
# rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/reply/batch_create")
if comment_list:
rpc_res = post_muilty_data(comment_list, "cims/reply/batch_create")
except Exception as e:
print("comment error")
print(e)
except Exception as e:
print(e)
continue
rds.hdel("xiaohongshu",pid)
rds.sadd("xiaohongshu_exists_set",pid)
else:
......
......@@ -5,6 +5,7 @@
# @author : litao
import copy
import execjs
import redis
import requests
import json
......@@ -248,7 +249,16 @@ class Crawler_xiaohongshu():
# break
if __name__ == '__main__':
test = Crawler_xiaohongshu()
# try:
# with open(r'D:\work_file\gengmei\crawler\crawler_sys\site_crawler_by_redis\xiaohongshu_js.js', 'r', encoding='utf-8') as f:
# js = f.read()
# except:
# with open('/srv/apps/crawler/crawler_sys/site_crawler_by_redis/xiaohongshu.js', 'r', encoding='utf-8') as f:
# js = f.read()
# # print(js)
# exec_js = execjs.compile(js)
# exec_js.call("get_sign", "https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae")
# test = Crawler_xiaohongshu()
releaserurl = 'https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae'
url_list =[
"https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae",
......@@ -358,4 +368,8 @@ if __name__ == '__main__':
]
for url in url_list:
print(url)
res = test.releaser_page(url,proxies_num=0)
try:
res = test.releaser_page(url,proxies_num=0)
except Exception as e:
print(e)
continue
......@@ -79,7 +79,27 @@ class Crawler_xiaohongshu():
def __exit__(self):
self.driver.close()
def get_one_page(self, page_id, proxies=0,cookies={}):
def get_one_page_xiaochengxu(self, page_id, proxies=0):
url = "https://www.xiaohongshu.com/fe_api/burdock/weixin/v2/note/%s/single_feed" % page_id
sign = url.replace("https://www.xiaohongshu.com", "") + "WSUDD"
pid = "X" + hashlib.md5(sign.encode("utf8")).hexdigest()
headers = {
"Host": "www.xiaohongshu.com",
"Connection": "keep-alive",
"Authorization": "wxmp.e9f56b15-49c3-44ba-805a-85319f068e2d",
"Device-Fingerprint": "WHJMrwNw1k/GXMIH6oNZHpKs63VE6C7ZMjqtmn8f/sCpHczahRVlh42Bpdarby1ViQbBXCX6wdvNzCZlrBkkp8VRIBqLNpMnBdCW1tldyDzmauSxIJm5Txg==1487582755342",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36 MicroMessenger/7.0.9.501 NetType/WIFI MiniProgramEnv/Windows WindowsWechat",
"X-Sign": pid,
"content-type": "application/json",
"Referer": "https://servicewechat.com/wxb296433268a1c654/14/page-frame.html",
"Accept-Encoding": "gzip, deflate, br",
}
res = retry_get_url(url, headers=headers, proxies=proxies)
return res.json()
def get_one_page_pc(self, page_id, proxies=0,cookies={}):
url = "https://www.xiaohongshu.com/discovery/item/%s" % page_id
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
......@@ -106,7 +126,63 @@ class Crawler_xiaohongshu():
def get_releaser_id(self, releaserUrl):
return get_releaser_id(platform="xiaohongshu", releaserUrl=releaserUrl)
def releaser_page(self, releaserUrl,
def releaser_page_by_xiaochengxu(self, releaserUrl,
output_to_file=False,
filepath=None,
releaser_page_num_max=30,
output_to_es_raw=False,
es_index=None,
doc_type=None,
output_to_es_register=False,
push_to_redis=False, proxies_num=None, **kwargs):
count = 1
releaser_id = self.get_releaser_id(releaserUrl)
# proxies = {'http': 'http://hanye:i9mmu0a3@58.55.159.141:16085/', 'https': 'http://hanye:i9mmu0a3@58.55.159.141:16085/'}
while count <= releaser_page_num_max and count <= 1:
releaserUrl = "https://www.xiaohongshu.com/fe_api/burdock/weixin/v2/user/%s/notes?page=%s&page_size=15" % (releaser_id,str(count))
sign = releaserUrl.replace("https://www.xiaohongshu.com", "") + "WSUDD"
pid = "X" + hashlib.md5(sign.encode("utf8")).hexdigest()
headers = {
"Host": "www.xiaohongshu.com",
"Connection": "keep-alive",
"Device-Fingerprint": "WHJMrwNw1k/GXMIH6oNZHpLWoBQzvkDy05jr1Va0PmTbRiILIJqrbuTSXZlYWlVESzybG5xn+rT8DpNKSeRU9PyU5zXZgL7zsdCW1tldyDzmauSxIJm5Txg==1487582755342",
"X-Sign": pid,
"content-type": "application/json",
"Authorization": "wxmp.03a6e90f-3759-4934-b05d-033c6eff74e9",
"Accept-Encoding": "gzip,compress,br,deflate",
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/7.0.21(0x17001525) NetType/WIFI Language/zh_TW",
"Referer": "https://servicewechat.com/wxb296433268a1c654/14/page-frame.html",
}
try:
print(releaserUrl)
res = retry_get_url(releaserUrl, headers=headers, proxies=proxies_num)
except:
continue
# print(get_page.content)
time.sleep(random.randint(1, 2))
data_list = res.json()
if data_list:
print("get data at releaser: %s page: %s" % (releaser_id, count))
count += 1
for info_dic in data_list["data"]:
video_dic = {}
page_id = info_dic["id"]
title = info_dic["title"]
if info_dic["type"] != "normal":
continue
time_ts = datetime.datetime.strptime(info_dic["time"], '%Y-%m-%d %H:%M').timestamp()
page_data = self.get_one_page_xiaochengxu(page_id, proxies=proxies_num)
# print(page_data)
rds.hset("xiaohongshu", key=pid, value=json.dumps(page_data))
yield page_data
def releaser_page_by_pc(self, releaserUrl,
output_to_file=False,
filepath=None,
releaser_page_num_max=30,
......@@ -196,7 +272,7 @@ class Crawler_xiaohongshu():
time_ts = datetime.datetime.strptime(info_dic["time"],'%Y-%m-%d %H:%M').timestamp()
if info_dic["type"] != "normal":
continue
page_data = self.get_one_page(page_id,proxies=proxies_num,cookies=cookie_dic)
page_data = self.get_one_page_xiaochengxu(page_id,proxies=proxies_num)
print(page_data)
title = title
anwser = desc
......@@ -211,6 +287,9 @@ class Crawler_xiaohongshu():
yield page_data
# break
def releaser_page(self,releaserUrl,**kwargs):
for res in self.releaser_page_by_xiaochengxu(releaserUrl,**kwargs):
yield res
def releaser_page_by_time(self, start_time, end_time, url, allow, **kwargs):
count_false = 0
......@@ -232,9 +311,114 @@ class Crawler_xiaohongshu():
if __name__ == '__main__':
test = Crawler_xiaohongshu()
releaserurl = 'https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae'
url_list =[
url_list = [
"https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae",
"https://www.xiaohongshu.com/user/profile/5ea6909900000000010057a3",
"https://www.xiaohongshu.com/user/profile/5a03b1f4b1da1412dd070a86",
"https://www.xiaohongshu.com/user/profile/5b6e76419276ee0001bd5740",
"https://www.xiaohongshu.com/user/profile/5c4140500000000006006cb7",
"https://www.xiaohongshu.com/user/profile/5bd2beff7da0890001b5408a",
"https://www.xiaohongshu.com/user/profile/5b5edc5211be1044bcce7824",
"https://www.xiaohongshu.com/user/profile/5b35cce84eacab52fbe15c0b",
"https://www.xiaohongshu.com/user/profile/5efec35c000000000101d75a",
"https://www.xiaohongshu.com/user/profile/5f91428a000000000101d909",
"https://www.xiaohongshu.com/user/profile/5ed49f1200000000010017f0",
"https://www.xiaohongshu.com/user/profile/5ae3f47b11be105fae4b854c",
"https://www.xiaohongshu.com/user/profile/5a9e10fb11be1006adc5b9d5",
"https://www.xiaohongshu.com/user/profile/5d0c3b900000000012013409",
"https://www.xiaohongshu.com/user/profile/5f1013a70000000001005b16",
"https://www.xiaohongshu.com/user/profile/5f5c6d860000000001001787",
"https://www.xiaohongshu.com/user/profile/5eeb18e600000000010062b6",
"https://www.xiaohongshu.com/user/profile/5bab62e9ee80fc0001505980",
"https://www.xiaohongshu.com/user/profile/5f262a610000000001004ea9",
"https://www.xiaohongshu.com/user/profile/5eb6779300000000010045f5",
"https://www.xiaohongshu.com/user/profile/5c855374000000001202ef0c",
"https://www.xiaohongshu.com/user/profile/5ecb6d7300000000010016a4",
"https://www.xiaohongshu.com/user/profile/5f100b2d000000000100138d",
"https://www.xiaohongshu.com/user/profile/5c14ae400000000006016f5d",
"https://www.xiaohongshu.com/user/profile/5bbd28de4c26220001881cbd",
"https://www.xiaohongshu.com/user/profile/5f86b6fc000000000100a5d8",
"https://www.xiaohongshu.com/user/profile/5db16ca20000000001004c02",
"https://www.xiaohongshu.com/user/profile/5ad553bb4eacab34ee9f7d4a",
"https://www.xiaohongshu.com/user/profile/5f12cffd000000000101da61",
"https://www.xiaohongshu.com/user/profile/596d7e4f5e87e722ff1bfd32",
"https://www.xiaohongshu.com/user/profile/5ef17ad00000000001005e1c",
"https://www.xiaohongshu.com/user/profile/5f75a5700000000001007679",
"https://www.xiaohongshu.com/user/profile/5c639f59000000001000c731",
"https://www.xiaohongshu.com/user/profile/5f865cbd0000000001002f01",
"https://www.xiaohongshu.com/user/profile/5eccc58f000000000100753e",
"https://www.xiaohongshu.com/user/profile/5fbe05b4000000000101c88d",
"https://www.xiaohongshu.com/user/profile/5b7d1da7e8ac2b471ee6fef3",
"https://www.xiaohongshu.com/user/profile/5a11b22211be101018ba7125",
"https://www.xiaohongshu.com/user/profile/5a76c3c611be107f08bd35b3",
"https://www.xiaohongshu.com/user/profile/5ecb6d7300000000010016a4",
"https://www.xiaohongshu.com/user/profile/5f2539e80000000001009d9e",
"https://www.xiaohongshu.com/user/profile/561b1fd8e4b1cf0295755d05",
"https://www.xiaohongshu.com/user/profile/5beeba1ff7e8b93bc0405234",
"https://www.xiaohongshu.com/user/profile/5c87785f000000001000ed51",
"https://www.xiaohongshu.com/user/profile/5efdba65000000000101c79c",
"https://www.xiaohongshu.com/user/profile/5507e7dfa46e9616260827f6",
"https://www.xiaohongshu.com/user/profile/567573470bf90c27957dd73c",
"https://www.xiaohongshu.com/user/profile/5fd1821b000000000100381a",
"https://www.xiaohongshu.com/user/profile/5f5f6b1c00000000010064dc",
"https://www.xiaohongshu.com/user/profile/5aea4d31e8ac2b4a44e1d2d4",
"https://www.xiaohongshu.com/user/profile/5f39eabb00000000010076ca",
"https://www.xiaohongshu.com/user/profile/5cda11d7000000001703780c",
"https://www.xiaohongshu.com/user/profile/5cbc3e9f000000001701d7bf",
"https://www.xiaohongshu.com/user/profile/5e7886930000000001003f7f",
"https://www.xiaohongshu.com/user/profile/566fbc3550c4b435f51f637b",
"https://www.xiaohongshu.com/user/profile/5e86cb34000000000100a223",
"https://www.xiaohongshu.com/user/profile/558e15b2f5a263490c65cdaa",
"https://www.xiaohongshu.com/user/profile/5d9eef320000000001001615",
"https://www.xiaohongshu.com/user/profile/5a6ba3214eacab4eee8e627a",
"https://www.xiaohongshu.com/user/profile/5f58cacb000000000100bdf5",
"https://www.xiaohongshu.com/user/profile/5f954030000000000100780c",
"https://www.xiaohongshu.com/user/profile/5f5745bf000000000100351d",
"https://www.xiaohongshu.com/user/profile/5c74a2b9000000001002e667",
"https://www.xiaohongshu.com/user/profile/595ee5b882ec397553103dd3",
"https://www.xiaohongshu.com/user/profile/5a5e20324eacab30f03654fb",
"https://www.xiaohongshu.com/user/profile/55743bedc2bdeb1a16844741",
"https://www.xiaohongshu.com/user/profile/5f0d523800000000010056de",
"https://www.xiaohongshu.com/user/profile/59d5b03e44363b61a050532f",
"https://www.xiaohongshu.com/user/profile/5ebdd5f40000000001002a67",
"https://www.xiaohongshu.com/user/profile/5f1c1b7b0000000001006cbf",
"https://www.xiaohongshu.com/user/profile/5ae404944eacab794dfb95b1",
"https://www.xiaohongshu.com/user/profile/5d26276a0000000012017538",
"https://www.xiaohongshu.com/user/profile/5ed5aa8f0000000001001f1e",
"https://www.xiaohongshu.com/user/profile/5f92cf4f000000000100a846",
"https://www.xiaohongshu.com/user/profile/5a75d42011be10344b917ffe",
"https://www.xiaohongshu.com/user/profile/5ccea0ff000000001002b753",
"https://www.xiaohongshu.com/user/profile/5c4418750000000005006717",
"https://www.xiaohongshu.com/user/profile/5ec582d60000000001005315",
"https://www.xiaohongshu.com/user/profile/594a93835e87e72f3e2ded11",
"https://www.xiaohongshu.com/user/profile/5b8ab07606311b000184195a",
"https://www.xiaohongshu.com/user/profile/54e7413ea46e96122dab7674",
"https://www.xiaohongshu.com/user/profile/5f3657900000000001002181",
"https://www.xiaohongshu.com/user/profile/5a65d6554eacab6864e2749e",
"https://www.xiaohongshu.com/user/profile/5a745dc911be101d9ceab748",
"https://www.xiaohongshu.com/user/profile/59b2033550c4b45e5d43c3d9",
"https://www.xiaohongshu.com/user/profile/59a97aaa5e87e760e012dcd0",
"https://www.xiaohongshu.com/user/profile/5a5de03611be100219719b0f",
"https://www.xiaohongshu.com/user/profile/5f40a5170000000001008577",
"https://www.xiaohongshu.com/user/profile/597e82aa5e87e73c4915db81",
"https://www.xiaohongshu.com/user/profile/580e0bc36a6a69043935369d",
"https://www.xiaohongshu.com/user/profile/5d1a17670000000012021d8e",
"https://www.xiaohongshu.com/user/profile/59a830be82ec39155146f421",
"https://www.xiaohongshu.com/user/profile/55efc1b73397db0e969c8fbd",
"https://www.xiaohongshu.com/user/profile/5c8c55220000000010005810",
"https://www.xiaohongshu.com/user/profile/5f337df2000000000101e2b2",
"https://www.xiaohongshu.com/user/profile/5f2111500000000001009b7b",
"https://www.xiaohongshu.com/user/profile/59c840ff44363b497f335cd4",
"https://www.xiaohongshu.com/user/profile/5f8e8508000000000101d70e",
"https://www.xiaohongshu.com/user/profile/5a163e3511be10234e1abffd",
"https://www.xiaohongshu.com/user/profile/5e71f6870000000001005e52",
'https://www.xiaohongshu.com/user/profile/5cca9b3700000000120314c9',
'https://www.xiaohongshu.com/user/profile/5aa0f7bae8ac2b65bfcdaf0e',
'https://www.xiaohongshu.com/user/profile/5c20dd200000000007027c07',
'https://www.xiaohongshu.com/user/profile/5fe1c1ba0000000001006e65',
]
for url in url_list:
print(url)
res = test.releaser_page(url,proxies_num=0)
for r in res:
print(r)
const crypto = require('crypto');
/**
* 生成 x-sign header
* `x-sign: 'X' + md5(url + 'WSUDD')`
* @param {string} url url
* @param {object} params 参数
*/
function generateXSign(url, params = {}) {
const searchString = new URLSearchParams(params).toString();
const realUrl = `${url}${searchString ? '?' : ''}${searchString}WSUDD`;
const md5 = crypto.createHash('md5').update(realUrl).digest('hex');
return `X${md5}`;
}
module.exports = {
generateXSign,
};
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment