Commit b3ca6aa0 authored by litaolemo's avatar litaolemo

update

parent d7833997
......@@ -170,7 +170,7 @@ class Crawler_xiaohongshu():
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
# "cookie": "xhsTrackerId=a81077f9-661a-4731-c790-ac6fbbeaa44b; extra_exp_ids=gif_clt1,ques_exp2; xhsuid=2EFsw5qOMk70l1we; timestamp2=2021010899964852bd70ca4c0c991c6c; timestamp2.sig=Lj3xTHgJ-JO20IUULPRnAhACddlzUtd7AsUzrlJQbWc; xhs_spses.5dde=*; xhsTracker=url=index&searchengine=baidu; xhs_spid.5dde=4dc700089fbdde46.1610082780.1.1610083480.1610082780.d70776d0-eac9-4684-912e-130f0cdb86a1",
"cookie": "xhsTrackerId=a81077f9-661a-4731-c790-ac6fbbeaa44b; extra_exp_ids=gif_clt1,ques_exp2; xhsuid=2EFsw5qOMk70l1we; timestamp2=2021010899964852bd70ca4c0c991c6c; timestamp2.sig=Lj3xTHgJ-JO20IUULPRnAhACddlzUtd7AsUzrlJQbWc; xhs_spses.5dde=*; xhsTracker=url=index&searchengine=baidu; xhs_spid.5dde=4dc700089fbdde46.1610082780.1.1610083480.1610082780.d70776d0-eac9-4684-912e-130f0cdb86a1",
"pragma": "no-cache",
"sec-ch-ua": '"Google Chrome";v="87", " Not;A Brand";v="99", "Chromium";v="87"',
"sec-ch-ua-mobile": "?0",
......@@ -188,8 +188,7 @@ class Crawler_xiaohongshu():
retry_time = 0
result_list = []
releaser_id = self.get_releaser_id(releaserUrl)
releaserUrl = 'https://www.xiaohongshu.com/user/profile/%s' % releaser_id
self.video_data['releaserUrl'] = releaserUrl
releaserUrl = 'http://www.xiaohongshu.com/user/profile/%s' % releaser_id
pcursor = 0
# cookie_dic = {'timestamp2.sig': 'QaPtkKr8VeAbx324ZSJgUSeLhjE2Lj1kDhdmZReaewo', 'timestamp2': '20210108b8c577995da3b1aa5e9a7392', 'xhsuid': 'cqq3glNpFsMgH50j', 'xhs_spses.5dde': '*', 'xhs_spid.5dde': 'fa1043ce96194610.1610072893.1.1610072895.1610072893.3536bab9-1e85-4a3a-8a46-37e694100de1', 'extra_exp_ids': 'gif_clt1,ques_clt1', 'xhsTrackerId': '591fba69-1884-4ab2-ca05-9ae70ab77d2e'}
# print(proxies)
......@@ -197,7 +196,7 @@ class Crawler_xiaohongshu():
while count <= releaser_page_num_max and count <= 1000:
try:
print(releaserUrl)
res = retry_get_url(releaserUrl, headers=headers, proxies=proxies_num,cookies=cookie_dic)
res = retry_get_url(releaserUrl, headers=headers, proxies=proxies_num)
except:
continue
# print(get_page.content)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment