Commit 5e91cb53 authored by litaolemo's avatar litaolemo

update

parent 42faadf9
...@@ -198,9 +198,10 @@ class Crawler_xiaohongshu(): ...@@ -198,9 +198,10 @@ class Crawler_xiaohongshu():
releaser_id = self.get_releaser_id(releaserUrl) releaser_id = self.get_releaser_id(releaserUrl)
releaserUrl = 'http://www.xiaohongshu.com/user/profile/%s' % releaser_id releaserUrl = 'http://www.xiaohongshu.com/user/profile/%s' % releaser_id
pcursor = 0 pcursor = 0
cookie_dic = {'timestamp2': '2021011521167acd3955b98063aef973'} cookie_dic = {'timestamp2': '2021012521167acd3955b980c6baec3f'}
# proxies = {'http': 'http://hanye:i9mmu0a3@58.55.159.141:16085/', 'https': 'http://hanye:i9mmu0a3@58.55.159.141:16085/'} # proxies = {'http': 'http://hanye:i9mmu0a3@58.55.159.141:16085/', 'https': 'http://hanye:i9mmu0a3@58.55.159.141:16085/'}
while count <= releaser_page_num_max and count <= 1: while count <= releaser_page_num_max and count <= 1:
time.sleep(0.5)
try: try:
print(releaserUrl) print(releaserUrl)
res = retry_get_url(releaserUrl, headers=headers, proxies=proxies_num, cookies=cookie_dic) res = retry_get_url(releaserUrl, headers=headers, proxies=proxies_num, cookies=cookie_dic)
...@@ -258,7 +259,7 @@ if __name__ == '__main__': ...@@ -258,7 +259,7 @@ if __name__ == '__main__':
# # print(js) # # print(js)
# exec_js = execjs.compile(js) # exec_js = execjs.compile(js)
# exec_js.call("get_sign", "https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae") # exec_js.call("get_sign", "https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae")
# test = Crawler_xiaohongshu() test = Crawler_xiaohongshu()
releaserurl = 'https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae' releaserurl = 'https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae'
url_list =[ url_list =[
"https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae", "https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae",
......
...@@ -158,15 +158,40 @@ class Crawler_zhihu(): ...@@ -158,15 +158,40 @@ class Crawler_zhihu():
"sec-fetch-dest": "empty", "sec-fetch-dest": "empty",
"sec-fetch-mode": "cors", "sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin", "sec-fetch-site": "same-origin",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36",
"x-ab-param": "li_yxzl_new_style_a=1;se_major=0;se_adsrank=4;se_hi_trunc=0;se_v053=1;li_panswer_topic=0;zr_test_aa1=1;pf_noti_entry_num=2;zr_search_sim2=2;zr_slotpaidexp=2;se_zp_boost=0;tp_club_entrance=1;pf_profile2_tab=0;ug_newtag=1;li_answer_card=0;ls_recommend_test=5;qap_labeltype=1;zr_rec_answer_cp=open;se_sug_term=0;tp_topic_tab=0;ge_ge01=5;se_wil_act=0;se_videobox=0;tsp_ad_cardredesign=0;qap_question_visitor= 0;zr_slot_training=2;tp_clubhyb=0;li_ebook_gen_search=2;se_v_v005=0;zw_sameq_sorce=999;ge_ge02=6;se_mobilecard=0;se_auth_src=0;tp_header_style=1;tp_flow_ctr=0;pf_creator_card=1;li_viptab_name=0;zr_intervene=0;se_bert128=1;se_ffzx_jushen1=0;top_v_album=1;se_preset=0;tp_discover=1;ls_fmp4=0;tp_club_top=0;top_universalebook=1;li_svip_cardshow=1;li_paid_answer_exp=0;tp_topic_style=0;zr_art_rec=base;se_colorfultab=1;se_auth_src2=0;tp_club_qa_entrance=1;tp_club__entrance2=1;tsp_hotlist_ui=3;li_svip_tab_search=1;se_entity22=1;tp_meta_card=0;tp_topic_tab_new=0-0-0;tp_zrec=0;top_ebook=0;pf_adjust=1;qap_question_author=0;zr_topic_rpc=0;se_topicfeed=0;tp_club_feed=0;tsp_ioscard2=0;zr_rel_search=base;se_recommend=1;se_usercard=0;tp_club_fdv4=0;tp_m_intro_re_topic=1;pf_foltopic_usernum=0;li_vip_verti_search=0;zr_training_boost=false;se_v054=0;tp_contents=1;soc_feed_intelligent=3;tsp_ios_cardredesign=0;pf_fuceng=1;pf_newguide_vertical=0;ug_follow_topic_1=2;ls_video_commercial=0;li_car_meta=1;se_sug_dnn=0;tp_fenqu_wei=0;li_catalog_card=1;top_quality=0;se_click_v_v=1;se_aa_base=1;se_club_ui=0;se_return_1=0;soc_notification=1;zr_ans_rec=gbrank;zr_search_paid=1;zr_expslotpaid=3;zr_rerank=0;se_college=default;se_whitelist=1;top_root=0;li_yxxq_aut=A1;tsp_adcard2=0;ls_videoad=2;se_col_boost=1;li_edu_page=old;zr_training_first=false;se_t2sug=1;se_vbert3=0;se_merge=0;li_video_section=1;zr_km_answer=open_cvr;zr_sim3=0;se_v_v006=0;tp_dingyue_video=0;li_topics_search=0;se_searchwiki=0;se_guess=0;se_major_v2=0;tp_club_bt=0;tp_sft=a;top_test_4_liguangyi=1", "x-ab-param": "tp_topic_style=0;li_video_section=1;top_test_4_liguangyi=1;pf_adjust=1;li_sp_mqbk=0;li_paid_answer_exp=0;tp_zrec=1;zr_slotpaidexp=1;qap_question_visitor= 0;pf_noti_entry_num=2;zr_expslotpaid=3;li_edu_page=old;qap_question_author=0;tp_dingyue_video=0;li_vip_verti_search=0;tp_contents=1;se_ffzx_jushen1=0;li_panswer_topic=0",
"x-api-version": "3.0.91", "x-api-version": "3.0.91",
"x-ab-pb": "Ck5oALcAlgsuAQEL7ApMCwgARwAbAGcAtApsAMUA1wt1DDcMUgv0C+QKtQtDAIkMNAzPCz8AQAFPAVgBBwybC1YMQgC0AGIB4AsPC9wLYAsSJwAAAQABAQABAAAAAAAAAAEBAQAAAxUAAAsAAQAAAQIBBQEAAAEAAA==",
"x-app-za": "OS=Web", "x-app-za": "OS=Web",
"x-requested-with": "fetch", "x-requested-with": "fetch",
"x-zse-83": "3_2.0", "x-zse-83": "3_2.0",
"x-zse-86": None, "x-zse-86": None,
"referer": "https://www.zhihu.com/search?type=content&q={0}".format(urllib.parse.quote(keyword)), "referer": "https://www.zhihu.com/search?type=content&q={0}".format(urllib.parse.quote(keyword)),
}
headers_search = {
"accept": "*/*",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
"cookie": '_zap=20547721-b576-4409-95c1-000c6f20517b; d_c0="AIDu7_zGrA-PToWVy-siVNLS835i5YXmFCQ=|1562072925"; __gads=ID=bdc51df6433d4288:T=1562072932:S=ALNI_MbUwg2TeI33p4EnEYpHr8bAKBUiNQ; _ga=GA1.2.929365035.1592357886; _xsrf=MuvTOIUy5KNtEQCk76uG0nAbiqt6IyKS; __utma=51854390.929365035.1592357886.1603162707.1603162707.1; __utmz=51854390.1603162707.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmv=51854390.100--|2=registration_date=20200803=1^3=entry_date=20190702=1; q_c1=e59a45f95396455e871eb111bdd827e1|1611029549000|1562072927000; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1611553707,1611553987,1611628615,1611628984; tst=r; SESSIONID=ZJhStdMnZrqk62Z7NDT3q8Aej7nPl8YLxBv67kD362k; JOID=VF4QC03vctAIaP88FOESAkuDNkoGtTu1VRybcne1AKJuUM1mIAlxwmZo_DIXMGOzOJtakS2tzTJLGRpyoIMzzjQ=; osd=VFoXBkPvdtcFZv84E-wcAk-EO0QGsTy4WxyfdXq7AKZpXcNmJA58zGZs-z8ZMGe0NZValSqgwzJPHhd8oIc0wzo=; capsion_ticket="2|1:0|10:1611730708|14:capsion_ticket|44:NDM2OGI0NTY3NGU1NGNjNGIzY2RiZjMwN2E0ZWZjMTI=|b5f503cfee4007cc351da02ff81f6198d4681d8a29eb7864ed3fde99e102e54c"; z_c0="2|1:0|10:1611730767|4:z_c0|92:Mi4xSDJLUUhRQUFBQUFBZ083dl9NYXNEeVlBQUFCZ0FsVk5UMTMtWUFER0hacnlKQ1ZwUEJKVHRDbFdSX2wxLW5fTmRn|85e8de2d4823e6ec8e0c8b5e07b190321708d562342dc54ac7c1a062d55e4695"; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1611734206; KLBRSID=2177cbf908056c6654e972f5ddc96dc2|1611734297|1611730401',
"pragma": "no-cache",
"referer": "https://www.zhihu.com/search?type=content&q=%E9%98%BF%E5%85%8B%E6%9B%BC%E8%A7%92",
"sec-ch-ua": '"Google Chrome";v="87", " Not;A Brand";v="99", "Chromium";v="87"',
"sec-ch-ua-mobile": "?0",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36",
"x-ab-param": "tp_topic_style=0;li_video_section=1;top_test_4_liguangyi=1;pf_adjust=1;li_sp_mqbk=0;li_paid_answer_exp=0;tp_zrec=1;zr_slotpaidexp=1;qap_question_visitor= 0;pf_noti_entry_num=2;zr_expslotpaid=3;li_edu_page=old;qap_question_author=0;tp_dingyue_video=0;li_vip_verti_search=0;tp_contents=1;se_ffzx_jushen1=0;li_panswer_topic=0",
"x-ab-pb": "Ck5oALcAlgsuAQEL7ApMCwgARwAbAGcAtApsAMUA1wt1DDcMUgv0C+QKtQtDAIkMNAzPCz8AQAFPAVgBBwybC1YMQgC0AGIB4AsPC9wLYAsSJwAAAQABAQABAAAAAAAAAAEBAQAAAxUAAAsAAQAAAQIBBQEAAAEAAA==",
"x-api-version": "3.0.91",
"x-app-za": "OS=Web",
"x-requested-with": "fetch",
"x-zse-83": "3_2.0",
"x-zse-86": "2.0_aXt0r7UqkTFxkLO01LOBc0U8b72YU9201MF8r4UBe72p",
} }
cookies_dict = { cookies_dict = {
"d_c0": '"AIDu7_zGrA-PToWVy-siVNLS835i5YXmFCQ=|1562072925"', "d_c0": '"AIDu7_zGrA-PToWVy-siVNLS835i5YXmFCQ=|1562072925"',
...@@ -178,9 +203,14 @@ class Crawler_zhihu(): ...@@ -178,9 +203,14 @@ class Crawler_zhihu():
url = "https://www.zhihu.com/api/v4/search_v3?t=general&q={0}&correction=1&offset=0&limit=20&lc_idx=0&show_all_topics=0".format( url = "https://www.zhihu.com/api/v4/search_v3?t=general&q={0}&correction=1&offset=0&limit=20&lc_idx=0&show_all_topics=0".format(
urllib.parse.quote(keyword)) urllib.parse.quote(keyword))
offset = 0 offset = 0
'3_2.0+/api/v4/search_v3?t=general&q=%E9%98%BF%E5%85%8B%E6%9B%BC%E8%A7%92&correction=1&offset=0&limit=20&lc_idx=0&show_all_topics=0+"AIDu7_zGrA-PToWVy-siVNLS835i5YXmFCQ=|1562072925"'
f = "+".join(["3_2.0", url.replace("https://www.zhihu.com",""), headers_search["referer"], cookies_dict["d_c0"]]) f = "+".join(["3_2.0", url.replace("https://www.zhihu.com",""), headers_search["referer"], cookies_dict["d_c0"]])
fmd5 = hashlib.new('md5', f.encode()).hexdigest() fmd5 = hashlib.new('md5', f.encode()).hexdigest()
headers_search["x-zse-86"] = "1.0_" + self.exec_js.call("b",fmd5) print(fmd5)
# fmd5 = "584f51b7034ae89b95ee01e202262101"
print(self.exec_js.call("b",fmd5))
headers_search["x-zse-86"] = "2.0_" + self.exec_js.call("b",fmd5)
# headers_search["x-zse-86"] = "2.0_" + "aXt0r7UqkTFxkLO01LOBc0U8b72YU9201MF8r4UBe72p"
res_list = [] res_list = []
while offset <= search_pages_max * 20: while offset <= search_pages_max * 20:
offset += 20 offset += 20
...@@ -399,6 +429,6 @@ if __name__ == '__main__': ...@@ -399,6 +429,6 @@ if __name__ == '__main__':
# os.environ["EXECJS_RUNTIME"] = 'Node' # os.environ["EXECJS_RUNTIME"] = 'Node'
# print(execjs.get().name ) # print(execjs.get().name )
# zhihu.get_serach_page_cookies("热玛吉") # zhihu.get_serach_page_cookies("热玛吉")
zhihu.search_page("双眼皮",search_pages_max=1,output_to_es_register=True) zhihu.search_page("阿克曼角",search_pages_max=1,output_to_es_register=True)
# zhihu.get_single_answer_page("325099876","1209953121") # zhihu.get_single_answer_page("325099876","1209953121")
# print(user_page) # print(user_page)
...@@ -4,7 +4,7 @@ const { window } = new JSDOM('<!doctype html><html><body></body></html>'); ...@@ -4,7 +4,7 @@ const { window } = new JSDOM('<!doctype html><html><body></body></html>');
global.window = window; global.window = window;
function t(e) { function t(e) {
return (t = "function" == typeof Symbol && "symbol" == typeof Symbol.A ? function(e) { return (t = "function" == typeof Symbol && "symbol" == typeof Symbol.A ? function(e) {
return typeof e return typeof e
} }
...@@ -13,9 +13,9 @@ global.window = window; ...@@ -13,9 +13,9 @@ global.window = window;
} }
)(e) )(e)
} }
Object.defineProperty(exports, "__esModule", { Object.defineProperty(exports, "__esModule", {
value: !0 value: !0
}); });
var A = "2.0" var A = "2.0"
, __g = {}; , __g = {};
function s() {} function s() {}
...@@ -216,7 +216,7 @@ global.window = window; ...@@ -216,7 +216,7 @@ global.window = window;
e.r[3] = e.r[this.s][r](o[0], o[1]); e.r[3] = e.r[this.s][r](o[0], o[1]);
break; break;
case 2: case 2:
for (var a = [], s = 0; s < this.i; s++) for (var a = [], c = 0; c < this.i; c++)
a.unshift(e.f.pop()); a.unshift(e.f.pop());
e.r[3] = new e.r[this.s](a[0],a[1]) e.r[3] = new e.r[this.s](a[0],a[1])
} }
...@@ -342,9 +342,10 @@ global.window = window; ...@@ -342,9 +342,10 @@ global.window = window;
u.prototype.e = function(e) { u.prototype.e = function(e) {
var t = this var t = this
, n = [0]; , n = [0];
e.k.forEach(function(e) { e.k.forEach((function(e) {
n.push(e) n.push(e)
}); }
));
var r = function(r) { var r = function(r) {
var o = new G; var o = new G;
return o.k = n, return o.k = n,
...@@ -380,10 +381,10 @@ global.window = window; ...@@ -380,10 +381,10 @@ global.window = window;
r.push(t.charCodeAt(o) << 8 | t.charCodeAt(o + 1)); r.push(t.charCodeAt(o) << 8 | t.charCodeAt(o + 1));
this.G = r; this.G = r;
for (var i = [], a = n + 2; a < t.length; ) { for (var i = [], a = n + 2; a < t.length; ) {
var s = t.charCodeAt(a) << 8 | t.charCodeAt(a + 1) var c = t.charCodeAt(a) << 8 | t.charCodeAt(a + 1)
, c = t.slice(a + 2, a + 2 + s); , s = t.slice(a + 2, a + 2 + c);
i.push(c), i.push(s),
a += s + 2 a += c + 2
} }
this.b = i this.b = i
} }
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
# @email : litao@igengmei.com # @email : litao@igengmei.com
# @author : litao # @author : litao
import copy import copy
from urllib.parse import quote
import redis import redis
import requests import requests
import json import json
...@@ -178,6 +178,69 @@ class Crawler_xiaohongshu(): ...@@ -178,6 +178,69 @@ class Crawler_xiaohongshu():
time_ts = datetime.datetime.strptime(info_dic["time"], '%Y-%m-%d %H:%M').timestamp() time_ts = datetime.datetime.strptime(info_dic["time"], '%Y-%m-%d %H:%M').timestamp()
page_data = self.get_one_page_xiaochengxu(page_id, proxies=proxies_num) page_data = self.get_one_page_xiaochengxu(page_id, proxies=proxies_num)
print(page_data) print(page_data)
if page_data["code"] == -1 and page_data["msg"] != "该内容无法展示":
continue
page_data['release_time'] = int(time_ts*1e3)
page_data['platform'] = 'xiaohongshu'
page_data['doc_id'] = page_id
# print(page_data)
rds.hset("xiaohongshu", key=page_id, value=json.dumps(page_data))
yield page_data
def search_page_xiaochengxu(self,query,output_to_file=False,
filepath=None,
releaser_page_num_max=30,
output_to_es_raw=False,
es_index=None,
doc_type=None,
output_to_es_register=False,
push_to_redis=False, proxies_num=None, **kwargs):
count = 1
# proxies = {'http': 'http://hanye:i9mmu0a3@58.55.159.141:16085/', 'https': 'http://hanye:i9mmu0a3@58.55.159.141:16085/'}
while count <= releaser_page_num_max:
releaserUrl = "https://www.xiaohongshu.com/fe_api/burdock/weixin/v2/search/notes?keyword=%s&sortBy=general&page=%s&pageSize=20&prependNoteIds=&needGifCover=true" % (quote(query),str(count))
sign = releaserUrl.replace("https://www.xiaohongshu.com", "") + "WSUDD"
pid = "X" + hashlib.md5(sign.encode("utf8")).hexdigest()
headers = {
"Host": "www.xiaohongshu.com",
"Connection": "keep-alive",
"Device-Fingerprint": "WHJMrwNw1k/GXMIH6oNZHpLWoBQzvkDy05jr1Va0PmTbRiILIJqrbuTSXZlYWlVESzybG5xn+rT8DpNKSeRU9PyU5zXZgL7zsdCW1tldyDzmauSxIJm5Txg==1487582755342",
"X-Sign": pid,
"content-type": "application/json",
"Authorization": "wxmp.03a6e90f-3759-4934-b05d-033c6eff74e9",
"Accept-Encoding": "gzip,compress,br,deflate",
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/7.0.21(0x17001525) NetType/WIFI Language/zh_TW",
"Referer": "https://servicewechat.com/wxb296433268a1c654/15/page-frame.html",
}
try:
print(releaserUrl)
res = retry_get_url(releaserUrl, headers=headers, proxies=proxies_num)
except:
continue
# print(get_page.content)
time.sleep(random.randint(1, 2))
data_list = res.json()
# if not data_list.get("data",{}).get("notes"):
# print("data_list error",data_list)
# break
if data_list:
print("get data at keyword: %s page: %s" % (query, count))
count += 1
if data_list.get("data",{}).get('totalCount',10)/10 < count:
break
for info_dic in data_list["data"]["notes"]:
video_dic = {}
page_id = info_dic["id"]
title = info_dic["title"]
if info_dic["type"] != "normal":
continue
time_ts = datetime.datetime.strptime(info_dic["time"], '%Y-%m-%d %H:%M').timestamp()
page_data = self.get_one_page_xiaochengxu(page_id, proxies=proxies_num)
print(page_data)
if page_data["code"] == -1 and page_data["msg"] != "该内容无法展示":
continue
page_data['release_time'] = int(time_ts*1e3) page_data['release_time'] = int(time_ts*1e3)
page_data['platform'] = 'xiaohongshu' page_data['platform'] = 'xiaohongshu'
page_data['doc_id'] = page_id page_data['doc_id'] = page_id
...@@ -287,7 +350,7 @@ class Crawler_xiaohongshu(): ...@@ -287,7 +350,7 @@ class Crawler_xiaohongshu():
continue continue
if rds.hexists("xiaohongshu",pid): if rds.hexists("xiaohongshu",pid):
continue continue
# rds.hset("xiaohongshu",key=page_id,value=json.dumps(page_data)) rds.hset("xiaohongshu",key=page_id,value=json.dumps(page_data))
yield page_data yield page_data
# break # break
...@@ -353,61 +416,61 @@ if __name__ == '__main__': ...@@ -353,61 +416,61 @@ if __name__ == '__main__':
# "https://www.xiaohongshu.com/user/profile/5eccc58f000000000100753e", # "https://www.xiaohongshu.com/user/profile/5eccc58f000000000100753e",
# "https://www.xiaohongshu.com/user/profile/5fbe05b4000000000101c88d", # "https://www.xiaohongshu.com/user/profile/5fbe05b4000000000101c88d",
# "https://www.xiaohongshu.com/user/profile/5b7d1da7e8ac2b471ee6fef3", # "https://www.xiaohongshu.com/user/profile/5b7d1da7e8ac2b471ee6fef3",
"https://www.xiaohongshu.com/user/profile/5a11b22211be101018ba7125", # "https://www.xiaohongshu.com/user/profile/5a11b22211be101018ba7125",
"https://www.xiaohongshu.com/user/profile/5a76c3c611be107f08bd35b3", # "https://www.xiaohongshu.com/user/profile/5a76c3c611be107f08bd35b3",
"https://www.xiaohongshu.com/user/profile/5ecb6d7300000000010016a4", # "https://www.xiaohongshu.com/user/profile/5ecb6d7300000000010016a4",
"https://www.xiaohongshu.com/user/profile/5f2539e80000000001009d9e", # "https://www.xiaohongshu.com/user/profile/5f2539e80000000001009d9e",
"https://www.xiaohongshu.com/user/profile/561b1fd8e4b1cf0295755d05", # "https://www.xiaohongshu.com/user/profile/561b1fd8e4b1cf0295755d05",
"https://www.xiaohongshu.com/user/profile/5beeba1ff7e8b93bc0405234", # "https://www.xiaohongshu.com/user/profile/5beeba1ff7e8b93bc0405234",
"https://www.xiaohongshu.com/user/profile/5c87785f000000001000ed51", # "https://www.xiaohongshu.com/user/profile/5c87785f000000001000ed51",
"https://www.xiaohongshu.com/user/profile/5efdba65000000000101c79c", # "https://www.xiaohongshu.com/user/profile/5efdba65000000000101c79c",
"https://www.xiaohongshu.com/user/profile/5507e7dfa46e9616260827f6", # "https://www.xiaohongshu.com/user/profile/5507e7dfa46e9616260827f6",
"https://www.xiaohongshu.com/user/profile/567573470bf90c27957dd73c", # "https://www.xiaohongshu.com/user/profile/567573470bf90c27957dd73c",
"https://www.xiaohongshu.com/user/profile/5fd1821b000000000100381a", # "https://www.xiaohongshu.com/user/profile/5fd1821b000000000100381a",
"https://www.xiaohongshu.com/user/profile/5f5f6b1c00000000010064dc", # "https://www.xiaohongshu.com/user/profile/5f5f6b1c00000000010064dc",
"https://www.xiaohongshu.com/user/profile/5aea4d31e8ac2b4a44e1d2d4", # "https://www.xiaohongshu.com/user/profile/5aea4d31e8ac2b4a44e1d2d4",
"https://www.xiaohongshu.com/user/profile/5f39eabb00000000010076ca", # "https://www.xiaohongshu.com/user/profile/5f39eabb00000000010076ca",
"https://www.xiaohongshu.com/user/profile/5cda11d7000000001703780c", # "https://www.xiaohongshu.com/user/profile/5cda11d7000000001703780c",
"https://www.xiaohongshu.com/user/profile/5cbc3e9f000000001701d7bf", # "https://www.xiaohongshu.com/user/profile/5cbc3e9f000000001701d7bf",
"https://www.xiaohongshu.com/user/profile/5e7886930000000001003f7f", # "https://www.xiaohongshu.com/user/profile/5e7886930000000001003f7f",
"https://www.xiaohongshu.com/user/profile/566fbc3550c4b435f51f637b", # "https://www.xiaohongshu.com/user/profile/566fbc3550c4b435f51f637b",
"https://www.xiaohongshu.com/user/profile/5e86cb34000000000100a223", # "https://www.xiaohongshu.com/user/profile/5e86cb34000000000100a223",
"https://www.xiaohongshu.com/user/profile/558e15b2f5a263490c65cdaa", # "https://www.xiaohongshu.com/user/profile/558e15b2f5a263490c65cdaa",
"https://www.xiaohongshu.com/user/profile/5d9eef320000000001001615", # "https://www.xiaohongshu.com/user/profile/5d9eef320000000001001615",
"https://www.xiaohongshu.com/user/profile/5a6ba3214eacab4eee8e627a", # "https://www.xiaohongshu.com/user/profile/5a6ba3214eacab4eee8e627a",
"https://www.xiaohongshu.com/user/profile/5f58cacb000000000100bdf5", # "https://www.xiaohongshu.com/user/profile/5f58cacb000000000100bdf5",
"https://www.xiaohongshu.com/user/profile/5f954030000000000100780c", # "https://www.xiaohongshu.com/user/profile/5f954030000000000100780c",
"https://www.xiaohongshu.com/user/profile/5f5745bf000000000100351d", # "https://www.xiaohongshu.com/user/profile/5f5745bf000000000100351d",
"https://www.xiaohongshu.com/user/profile/5c74a2b9000000001002e667", # "https://www.xiaohongshu.com/user/profile/5c74a2b9000000001002e667",
"https://www.xiaohongshu.com/user/profile/595ee5b882ec397553103dd3", # "https://www.xiaohongshu.com/user/profile/595ee5b882ec397553103dd3",
"https://www.xiaohongshu.com/user/profile/5a5e20324eacab30f03654fb", # "https://www.xiaohongshu.com/user/profile/5a5e20324eacab30f03654fb",
"https://www.xiaohongshu.com/user/profile/55743bedc2bdeb1a16844741", # "https://www.xiaohongshu.com/user/profile/55743bedc2bdeb1a16844741",
"https://www.xiaohongshu.com/user/profile/5f0d523800000000010056de", # "https://www.xiaohongshu.com/user/profile/5f0d523800000000010056de",
"https://www.xiaohongshu.com/user/profile/59d5b03e44363b61a050532f", # "https://www.xiaohongshu.com/user/profile/59d5b03e44363b61a050532f",
"https://www.xiaohongshu.com/user/profile/5ebdd5f40000000001002a67", # "https://www.xiaohongshu.com/user/profile/5ebdd5f40000000001002a67",
"https://www.xiaohongshu.com/user/profile/5f1c1b7b0000000001006cbf", # "https://www.xiaohongshu.com/user/profile/5f1c1b7b0000000001006cbf",
"https://www.xiaohongshu.com/user/profile/5ae404944eacab794dfb95b1", # "https://www.xiaohongshu.com/user/profile/5ae404944eacab794dfb95b1",
"https://www.xiaohongshu.com/user/profile/5d26276a0000000012017538", # "https://www.xiaohongshu.com/user/profile/5d26276a0000000012017538",
"https://www.xiaohongshu.com/user/profile/5ed5aa8f0000000001001f1e", # "https://www.xiaohongshu.com/user/profile/5ed5aa8f0000000001001f1e",
"https://www.xiaohongshu.com/user/profile/5f92cf4f000000000100a846", # "https://www.xiaohongshu.com/user/profile/5f92cf4f000000000100a846",
"https://www.xiaohongshu.com/user/profile/5a75d42011be10344b917ffe", # "https://www.xiaohongshu.com/user/profile/5a75d42011be10344b917ffe",
"https://www.xiaohongshu.com/user/profile/5ccea0ff000000001002b753", # "https://www.xiaohongshu.com/user/profile/5ccea0ff000000001002b753",
"https://www.xiaohongshu.com/user/profile/5c4418750000000005006717", # "https://www.xiaohongshu.com/user/profile/5c4418750000000005006717",
"https://www.xiaohongshu.com/user/profile/5ec582d60000000001005315", # "https://www.xiaohongshu.com/user/profile/5ec582d60000000001005315",
"https://www.xiaohongshu.com/user/profile/594a93835e87e72f3e2ded11", # "https://www.xiaohongshu.com/user/profile/594a93835e87e72f3e2ded11",
"https://www.xiaohongshu.com/user/profile/5b8ab07606311b000184195a", # "https://www.xiaohongshu.com/user/profile/5b8ab07606311b000184195a",
"https://www.xiaohongshu.com/user/profile/54e7413ea46e96122dab7674", # "https://www.xiaohongshu.com/user/profile/54e7413ea46e96122dab7674",
"https://www.xiaohongshu.com/user/profile/5f3657900000000001002181", # "https://www.xiaohongshu.com/user/profile/5f3657900000000001002181",
"https://www.xiaohongshu.com/user/profile/5a65d6554eacab6864e2749e", # "https://www.xiaohongshu.com/user/profile/5a65d6554eacab6864e2749e",
"https://www.xiaohongshu.com/user/profile/5a745dc911be101d9ceab748", # "https://www.xiaohongshu.com/user/profile/5a745dc911be101d9ceab748",
"https://www.xiaohongshu.com/user/profile/59b2033550c4b45e5d43c3d9", # "https://www.xiaohongshu.com/user/profile/59b2033550c4b45e5d43c3d9",
"https://www.xiaohongshu.com/user/profile/59a97aaa5e87e760e012dcd0", # "https://www.xiaohongshu.com/user/profile/59a97aaa5e87e760e012dcd0",
"https://www.xiaohongshu.com/user/profile/5a5de03611be100219719b0f", # "https://www.xiaohongshu.com/user/profile/5a5de03611be100219719b0f",
"https://www.xiaohongshu.com/user/profile/5f40a5170000000001008577", # "https://www.xiaohongshu.com/user/profile/5f40a5170000000001008577",
"https://www.xiaohongshu.com/user/profile/597e82aa5e87e73c4915db81", # "https://www.xiaohongshu.com/user/profile/597e82aa5e87e73c4915db81",
"https://www.xiaohongshu.com/user/profile/580e0bc36a6a69043935369d", # "https://www.xiaohongshu.com/user/profile/580e0bc36a6a69043935369d",
"https://www.xiaohongshu.com/user/profile/5d1a17670000000012021d8e", # "https://www.xiaohongshu.com/user/profile/5d1a17670000000012021d8e",
"https://www.xiaohongshu.com/user/profile/59a830be82ec39155146f421", # "https://www.xiaohongshu.com/user/profile/59a830be82ec39155146f421",
# "https://www.xiaohongshu.com/user/profile/55efc1b73397db0e969c8fbd", # "https://www.xiaohongshu.com/user/profile/55efc1b73397db0e969c8fbd",
# "https://www.xiaohongshu.com/user/profile/5c8c55220000000010005810", # "https://www.xiaohongshu.com/user/profile/5c8c55220000000010005810",
# "https://www.xiaohongshu.com/user/profile/5f337df2000000000101e2b2", # "https://www.xiaohongshu.com/user/profile/5f337df2000000000101e2b2",
...@@ -421,12 +484,161 @@ if __name__ == '__main__': ...@@ -421,12 +484,161 @@ if __name__ == '__main__':
# 'https://www.xiaohongshu.com/user/profile/5c20dd200000000007027c07', # 'https://www.xiaohongshu.com/user/profile/5c20dd200000000007027c07',
# 'https://www.xiaohongshu.com/user/profile/5fe1c1ba0000000001006e65', # 'https://www.xiaohongshu.com/user/profile/5fe1c1ba0000000001006e65',
] ]
url_list = [
# "https://www.xiaohongshu.com/user/profile/5ae3f47b11be105fae4b854c",
# "https://www.xiaohongshu.com/user/profile/5b345022e8ac2b2d24ca801d",
# "https://www.xiaohongshu.com/user/profile/5c3e7c4b00000000070188e7",
# "https://www.xiaohongshu.com/user/profile/5ba23dacc8bc86000191036c",
# "https://www.xiaohongshu.com/user/profile/5a96a4934eacab2a15f6bee6",
# "https://www.xiaohongshu.com/user/profile/5c1679b000000000060377af",
# "https://www.xiaohongshu.com/user/profile/5c3ff8260000000006002732",
# "https://www.xiaohongshu.com/user/profile/5ab6f28a4eacab14cf262fcb",
# "https://www.xiaohongshu.com/user/profile/5c8518a8000000001603ac67",
# "https://www.xiaohongshu.com/user/profile/5ed622f80000000001000395",
# "https://www.xiaohongshu.com/user/profile/5fbccc7d000000000100a90a",
# "https://www.xiaohongshu.com/user/profile/5ad02e7b4eacab4f00bdd342",
# "https://www.xiaohongshu.com/user/profile/5dd79e610000000001008b1a",
# "https://www.xiaohongshu.com/user/profile/5c88fce2000000001202adda",
# "https://www.xiaohongshu.com/user/profile/5e1fe0820000000001000998",
# "https://www.xiaohongshu.com/user/profile/5b541148f7e8b926ded2b297",
# "https://www.xiaohongshu.com/user/profile/5b729b6be8ac2b2520c6bd05",
# "https://www.xiaohongshu.com/user/profile/5b6a8d08b27a3c0001afff47",
# "https://www.xiaohongshu.com/user/profile/5a6e671f4eacab0c2ac4aa17",
# "https://www.xiaohongshu.com/user/profile/5affbb614eacab1ab2f46f06",
# "https://www.xiaohongshu.com/user/profile/59e215d66b64555dee8dd86f",
# "https://www.xiaohongshu.com/user/profile/58e68da46a6a690e89ed4ab4",
# "https://www.xiaohongshu.com/user/profile/5eccda220000000001004a57",
# "https://www.xiaohongshu.com/user/profile/5a4afdc94eacab65f92fa64f",
# "https://www.xiaohongshu.com/user/profile/5cde1cd100000000050302e5",
# "https://www.xiaohongshu.com/user/profile/5c3c10e600000000060367ba",
# "https://www.xiaohongshu.com/user/profile/5c6a67180000000012036f54",
# "https://www.xiaohongshu.com/user/profile/5ca847640000000010033353",
# "https://www.xiaohongshu.com/user/profile/5915d4ff6a6a6951773e7eab",
# "https://www.xiaohongshu.com/user/profile/5d079eb3000000001602ca3e",
# "https://www.xiaohongshu.com/user/profile/5a8fc4afe8ac2b15495c4cf8",
# "https://www.xiaohongshu.com/user/profile/5efb53ab000000000101f984",
# "https://www.xiaohongshu.com/user/profile/58a2984782ec396b0702a2a7",
# "https://www.xiaohongshu.com/user/profile/5ed6624c000000000101e175",
# "https://www.xiaohongshu.com/user/profile/5a588c8e11be104a80a67bee",
# "https://www.xiaohongshu.com/user/profile/55a73ecf67bc65743eb1ad37",
# "https://www.xiaohongshu.com/user/profile/5efec35c000000000101d75a",
# "https://www.xiaohongshu.com/user/profile/5df0ce15000000000100b7de",
# "https://www.xiaohongshu.com/user/profile/5f4650ec000000000101c50e",
# "https://www.xiaohongshu.com/user/profile/5e8bddee0000000001004429",
# "https://www.xiaohongshu.com/user/profile/5f16ec070000000001008b3b",
# "https://www.xiaohongshu.com/user/profile/5bca101898c35500018cfd0b",
# "https://www.xiaohongshu.com/user/profile/5b1b42ce4eacab3a485c6b23",
# "https://www.xiaohongshu.com/user/profile/5c3940f2000000000503658c",
# "https://www.xiaohongshu.com/user/profile/5fab4fa20000000001008ab4",
# "https://www.xiaohongshu.com/user/profile/58857d9582ec3917d9067b26",
# "https://www.xiaohongshu.com/user/profile/5edda9550000000001007b09",
# "https://www.xiaohongshu.com/user/profile/5a629d4a4eacab6e2782d26a",
# "https://www.xiaohongshu.com/user/profile/5b98ee9a865cea00019997eb",
# "https://www.xiaohongshu.com/user/profile/5e2900820000000001001980",
# "https://www.xiaohongshu.com/user/profile/5a329a594eacab743499d133",
# "https://www.xiaohongshu.com/user/profile/5b4d5f5ef7e8b969e2cde0c0",
# "https://www.xiaohongshu.com/user/profile/5fd87d0c00000000010072e7",
# "https://www.xiaohongshu.com/user/profile/5c356422000000000600d6ce",
# "https://www.xiaohongshu.com/user/profile/5cfc0fb3000000002501d5cf",
# "https://www.xiaohongshu.com/user/profile/5db940bb0000000001004c53",
# "https://www.xiaohongshu.com/user/profile/59c133f46a6a695e65251ad6",
# "https://www.xiaohongshu.com/user/profile/5c440a48000000000500228b",
# "https://www.xiaohongshu.com/user/profile/5fdd6ace0000000001004511",
# "https://www.xiaohongshu.com/user/profile/5c4bd0f00000000010000126",
# "https://www.xiaohongshu.com/user/profile/5f6af984000000000101ccec",
# "https://www.xiaohongshu.com/user/profile/5c146c86000000000500193e",
# "https://www.xiaohongshu.com/user/profile/5e22b66b0000000001005a74",
# "https://www.xiaohongshu.com/user/profile/57fc9ef97fc5b82f86737331",
# "https://www.xiaohongshu.com/user/profile/5e414bc30000000001005f37",
# "https://www.xiaohongshu.com/user/profile/5e744c69000000000100a08a",
# "https://www.xiaohongshu.com/user/profile/5d3572ba00000000120182f1",
# "https://www.xiaohongshu.com/user/profile/5fb09b9a000000000100a9de",
# "https://www.xiaohongshu.com/user/profile/5bb75a6c43c9ad00010cd3f4",
# "https://www.xiaohongshu.com/user/profile/59076bb182ec392f9bfd60a4",
# "https://www.xiaohongshu.com/user/profile/5f93a3e90000000001005a97",
# "https://www.xiaohongshu.com/user/profile/5f3a53d9000000000101e41f",
# "https://www.xiaohongshu.com/user/profile/5b93f13803c3110001038a76",
# "https://www.xiaohongshu.com/user/profile/5c3ed7df000000000702f642",
# "https://www.xiaohongshu.com/user/profile/5c0bd7ed000000000602f70d",
# "https://www.xiaohongshu.com/user/profile/5d19407c000000001601621f",
# "https://www.xiaohongshu.com/user/profile/5a4d88df4eacab14d56496ae",
# "https://www.xiaohongshu.com/user/profile/5fa165ac0000000001007863",
# "https://www.xiaohongshu.com/user/profile/5e629811000000000100b2c6",
# "https://www.xiaohongshu.com/user/profile/565787fa447608336ce1d41f",
# "https://www.xiaohongshu.com/user/profile/5e69e8650000000001001569",
# "https://www.xiaohongshu.com/user/profile/5de612e30000000001008202",
# "https://www.xiaohongshu.com/user/profile/598e64d35e87e72101b898ce",
# "https://www.xiaohongshu.com/user/profile/5de0d4c5000000000100232a",
# "https://www.xiaohongshu.com/user/profile/5f29771c00000000010034a8",
# "https://www.xiaohongshu.com/user/profile/5c1a709a0000000006019305",
# "https://www.xiaohongshu.com/user/profile/5aab36e111be101b82bf920b",
# "https://www.xiaohongshu.com/user/profile/5ca51d2f000000001201d44f",
# "https://www.xiaohongshu.com/user/profile/5c32c46a0000000006003450",
# "https://www.xiaohongshu.com/user/profile/5fb376fd000000000101eade",
# "https://www.xiaohongshu.com/user/profile/5c6a5f84000000001002a2c5",
# "https://www.xiaohongshu.com/user/profile/5d0efe9d0000000016011d30",
# "https://www.xiaohongshu.com/user/profile/5fc0a26b0000000001004470",
"https://www.xiaohongshu.com/user/profile/5e0b515c0000000001009994",
"https://www.xiaohongshu.com/user/profile/5c49cc6d0000000011024c05",
"https://www.xiaohongshu.com/user/profile/5c35b4100000000007007320",
"https://www.xiaohongshu.com/user/profile/5f86a6d6000000000100142b",
"https://www.xiaohongshu.com/user/profile/5d31bfb8000000001102b26e",
"https://www.xiaohongshu.com/user/profile/5e71f6870000000001005e52",
]
keyword_list = [
# "娃娃针",
# "婴儿针",
# "嗨体",
# "fotona4D",
# "瑞兰唯瑅",
# "双美胶原蛋白",
# "宝尼达",
# "英诺小脂素",
# "小切口拉皮",
# "海菲秀",
# "btl美修斯",
# "超光子",
# "无线光雕",
# "天使光雕",
# "吉适",
# "瑞博童颜针",
# "黄金微雕",
# "钻石精雕",
# "黄金微针",
# "英诺提拉素",
# "爱贝芙",
# "lams",
# "爱芙莱",
# "bbl",
# "aopt",
# "超体",
# "大V线",
# "直腿术",
# "润致娃娃针",
"三文鱼针",
"熊猫针",
"fotona 4D Pro",
"氐殊",
"DPL",
"天使脂雕",
"童颜针",
"aqua娃娃针",
"普利兰",
"DPL黑金",
]
print(len(url_list)) print(len(url_list))
count =0 count =0
for url in url_list: # for url in url_list:
print(url) # print(url)
res = test.releaser_page(url,proxies_num=0) # res = test.releaser_page(url,proxies_num=0)
# for r in res:
# count += 1
# print(count)
# # pass
for query in keyword_list:
res = test.search_page_xiaochengxu(query,releaser_page_num_max=200)
for r in res: for r in res:
count += 1 count += 1
print(count) print(count)
# pass
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment