Commit 5e91cb53 authored by litaolemo's avatar litaolemo

update

parent 42faadf9
......@@ -198,9 +198,10 @@ class Crawler_xiaohongshu():
releaser_id = self.get_releaser_id(releaserUrl)
releaserUrl = 'http://www.xiaohongshu.com/user/profile/%s' % releaser_id
pcursor = 0
cookie_dic = {'timestamp2': '2021011521167acd3955b98063aef973'}
cookie_dic = {'timestamp2': '2021012521167acd3955b980c6baec3f'}
# proxies = {'http': 'http://hanye:i9mmu0a3@58.55.159.141:16085/', 'https': 'http://hanye:i9mmu0a3@58.55.159.141:16085/'}
while count <= releaser_page_num_max and count <= 1:
time.sleep(0.5)
try:
print(releaserUrl)
res = retry_get_url(releaserUrl, headers=headers, proxies=proxies_num, cookies=cookie_dic)
......@@ -258,7 +259,7 @@ if __name__ == '__main__':
# # print(js)
# exec_js = execjs.compile(js)
# exec_js.call("get_sign", "https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae")
# test = Crawler_xiaohongshu()
test = Crawler_xiaohongshu()
releaserurl = 'https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae'
url_list =[
"https://www.xiaohongshu.com/user/profile/5abbb57211be1027a0c880ae",
......
......@@ -158,15 +158,40 @@ class Crawler_zhihu():
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36",
"x-ab-param": "li_yxzl_new_style_a=1;se_major=0;se_adsrank=4;se_hi_trunc=0;se_v053=1;li_panswer_topic=0;zr_test_aa1=1;pf_noti_entry_num=2;zr_search_sim2=2;zr_slotpaidexp=2;se_zp_boost=0;tp_club_entrance=1;pf_profile2_tab=0;ug_newtag=1;li_answer_card=0;ls_recommend_test=5;qap_labeltype=1;zr_rec_answer_cp=open;se_sug_term=0;tp_topic_tab=0;ge_ge01=5;se_wil_act=0;se_videobox=0;tsp_ad_cardredesign=0;qap_question_visitor= 0;zr_slot_training=2;tp_clubhyb=0;li_ebook_gen_search=2;se_v_v005=0;zw_sameq_sorce=999;ge_ge02=6;se_mobilecard=0;se_auth_src=0;tp_header_style=1;tp_flow_ctr=0;pf_creator_card=1;li_viptab_name=0;zr_intervene=0;se_bert128=1;se_ffzx_jushen1=0;top_v_album=1;se_preset=0;tp_discover=1;ls_fmp4=0;tp_club_top=0;top_universalebook=1;li_svip_cardshow=1;li_paid_answer_exp=0;tp_topic_style=0;zr_art_rec=base;se_colorfultab=1;se_auth_src2=0;tp_club_qa_entrance=1;tp_club__entrance2=1;tsp_hotlist_ui=3;li_svip_tab_search=1;se_entity22=1;tp_meta_card=0;tp_topic_tab_new=0-0-0;tp_zrec=0;top_ebook=0;pf_adjust=1;qap_question_author=0;zr_topic_rpc=0;se_topicfeed=0;tp_club_feed=0;tsp_ioscard2=0;zr_rel_search=base;se_recommend=1;se_usercard=0;tp_club_fdv4=0;tp_m_intro_re_topic=1;pf_foltopic_usernum=0;li_vip_verti_search=0;zr_training_boost=false;se_v054=0;tp_contents=1;soc_feed_intelligent=3;tsp_ios_cardredesign=0;pf_fuceng=1;pf_newguide_vertical=0;ug_follow_topic_1=2;ls_video_commercial=0;li_car_meta=1;se_sug_dnn=0;tp_fenqu_wei=0;li_catalog_card=1;top_quality=0;se_click_v_v=1;se_aa_base=1;se_club_ui=0;se_return_1=0;soc_notification=1;zr_ans_rec=gbrank;zr_search_paid=1;zr_expslotpaid=3;zr_rerank=0;se_college=default;se_whitelist=1;top_root=0;li_yxxq_aut=A1;tsp_adcard2=0;ls_videoad=2;se_col_boost=1;li_edu_page=old;zr_training_first=false;se_t2sug=1;se_vbert3=0;se_merge=0;li_video_section=1;zr_km_answer=open_cvr;zr_sim3=0;se_v_v006=0;tp_dingyue_video=0;li_topics_search=0;se_searchwiki=0;se_guess=0;se_major_v2=0;tp_club_bt=0;tp_sft=a;top_test_4_liguangyi=1",
"x-ab-param": "tp_topic_style=0;li_video_section=1;top_test_4_liguangyi=1;pf_adjust=1;li_sp_mqbk=0;li_paid_answer_exp=0;tp_zrec=1;zr_slotpaidexp=1;qap_question_visitor= 0;pf_noti_entry_num=2;zr_expslotpaid=3;li_edu_page=old;qap_question_author=0;tp_dingyue_video=0;li_vip_verti_search=0;tp_contents=1;se_ffzx_jushen1=0;li_panswer_topic=0",
"x-api-version": "3.0.91",
"x-ab-pb": "Ck5oALcAlgsuAQEL7ApMCwgARwAbAGcAtApsAMUA1wt1DDcMUgv0C+QKtQtDAIkMNAzPCz8AQAFPAVgBBwybC1YMQgC0AGIB4AsPC9wLYAsSJwAAAQABAQABAAAAAAAAAAEBAQAAAxUAAAsAAQAAAQIBBQEAAAEAAA==",
"x-app-za": "OS=Web",
"x-requested-with": "fetch",
"x-zse-83": "3_2.0",
"x-zse-86": None,
"referer": "https://www.zhihu.com/search?type=content&q={0}".format(urllib.parse.quote(keyword)),
}
headers_search = {
"accept": "*/*",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
"cookie": '_zap=20547721-b576-4409-95c1-000c6f20517b; d_c0="AIDu7_zGrA-PToWVy-siVNLS835i5YXmFCQ=|1562072925"; __gads=ID=bdc51df6433d4288:T=1562072932:S=ALNI_MbUwg2TeI33p4EnEYpHr8bAKBUiNQ; _ga=GA1.2.929365035.1592357886; _xsrf=MuvTOIUy5KNtEQCk76uG0nAbiqt6IyKS; __utma=51854390.929365035.1592357886.1603162707.1603162707.1; __utmz=51854390.1603162707.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmv=51854390.100--|2=registration_date=20200803=1^3=entry_date=20190702=1; q_c1=e59a45f95396455e871eb111bdd827e1|1611029549000|1562072927000; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1611553707,1611553987,1611628615,1611628984; tst=r; SESSIONID=ZJhStdMnZrqk62Z7NDT3q8Aej7nPl8YLxBv67kD362k; JOID=VF4QC03vctAIaP88FOESAkuDNkoGtTu1VRybcne1AKJuUM1mIAlxwmZo_DIXMGOzOJtakS2tzTJLGRpyoIMzzjQ=; osd=VFoXBkPvdtcFZv84E-wcAk-EO0QGsTy4WxyfdXq7AKZpXcNmJA58zGZs-z8ZMGe0NZValSqgwzJPHhd8oIc0wzo=; capsion_ticket="2|1:0|10:1611730708|14:capsion_ticket|44:NDM2OGI0NTY3NGU1NGNjNGIzY2RiZjMwN2E0ZWZjMTI=|b5f503cfee4007cc351da02ff81f6198d4681d8a29eb7864ed3fde99e102e54c"; z_c0="2|1:0|10:1611730767|4:z_c0|92:Mi4xSDJLUUhRQUFBQUFBZ083dl9NYXNEeVlBQUFCZ0FsVk5UMTMtWUFER0hacnlKQ1ZwUEJKVHRDbFdSX2wxLW5fTmRn|85e8de2d4823e6ec8e0c8b5e07b190321708d562342dc54ac7c1a062d55e4695"; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1611734206; KLBRSID=2177cbf908056c6654e972f5ddc96dc2|1611734297|1611730401',
"pragma": "no-cache",
"referer": "https://www.zhihu.com/search?type=content&q=%E9%98%BF%E5%85%8B%E6%9B%BC%E8%A7%92",
"sec-ch-ua": '"Google Chrome";v="87", " Not;A Brand";v="99", "Chromium";v="87"',
"sec-ch-ua-mobile": "?0",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36",
"x-ab-param": "tp_topic_style=0;li_video_section=1;top_test_4_liguangyi=1;pf_adjust=1;li_sp_mqbk=0;li_paid_answer_exp=0;tp_zrec=1;zr_slotpaidexp=1;qap_question_visitor= 0;pf_noti_entry_num=2;zr_expslotpaid=3;li_edu_page=old;qap_question_author=0;tp_dingyue_video=0;li_vip_verti_search=0;tp_contents=1;se_ffzx_jushen1=0;li_panswer_topic=0",
"x-ab-pb": "Ck5oALcAlgsuAQEL7ApMCwgARwAbAGcAtApsAMUA1wt1DDcMUgv0C+QKtQtDAIkMNAzPCz8AQAFPAVgBBwybC1YMQgC0AGIB4AsPC9wLYAsSJwAAAQABAQABAAAAAAAAAAEBAQAAAxUAAAsAAQAAAQIBBQEAAAEAAA==",
"x-api-version": "3.0.91",
"x-app-za": "OS=Web",
"x-requested-with": "fetch",
"x-zse-83": "3_2.0",
"x-zse-86": "2.0_aXt0r7UqkTFxkLO01LOBc0U8b72YU9201MF8r4UBe72p",
}
cookies_dict = {
"d_c0": '"AIDu7_zGrA-PToWVy-siVNLS835i5YXmFCQ=|1562072925"',
......@@ -178,9 +203,14 @@ class Crawler_zhihu():
url = "https://www.zhihu.com/api/v4/search_v3?t=general&q={0}&correction=1&offset=0&limit=20&lc_idx=0&show_all_topics=0".format(
urllib.parse.quote(keyword))
offset = 0
'3_2.0+/api/v4/search_v3?t=general&q=%E9%98%BF%E5%85%8B%E6%9B%BC%E8%A7%92&correction=1&offset=0&limit=20&lc_idx=0&show_all_topics=0+"AIDu7_zGrA-PToWVy-siVNLS835i5YXmFCQ=|1562072925"'
f = "+".join(["3_2.0", url.replace("https://www.zhihu.com",""), headers_search["referer"], cookies_dict["d_c0"]])
fmd5 = hashlib.new('md5', f.encode()).hexdigest()
headers_search["x-zse-86"] = "1.0_" + self.exec_js.call("b",fmd5)
print(fmd5)
# fmd5 = "584f51b7034ae89b95ee01e202262101"
print(self.exec_js.call("b",fmd5))
headers_search["x-zse-86"] = "2.0_" + self.exec_js.call("b",fmd5)
# headers_search["x-zse-86"] = "2.0_" + "aXt0r7UqkTFxkLO01LOBc0U8b72YU9201MF8r4UBe72p"
res_list = []
while offset <= search_pages_max * 20:
offset += 20
......@@ -399,6 +429,6 @@ if __name__ == '__main__':
# os.environ["EXECJS_RUNTIME"] = 'Node'
# print(execjs.get().name )
# zhihu.get_serach_page_cookies("热玛吉")
zhihu.search_page("双眼皮",search_pages_max=1,output_to_es_register=True)
zhihu.search_page("阿克曼角",search_pages_max=1,output_to_es_register=True)
# zhihu.get_single_answer_page("325099876","1209953121")
# print(user_page)
......@@ -4,7 +4,7 @@ const { window } = new JSDOM('<!doctype html><html><body></body></html>');
global.window = window;
function t(e) {
function t(e) {
return (t = "function" == typeof Symbol && "symbol" == typeof Symbol.A ? function(e) {
return typeof e
}
......@@ -13,9 +13,9 @@ global.window = window;
}
)(e)
}
Object.defineProperty(exports, "__esModule", {
Object.defineProperty(exports, "__esModule", {
value: !0
});
});
var A = "2.0"
, __g = {};
function s() {}
......@@ -216,7 +216,7 @@ global.window = window;
e.r[3] = e.r[this.s][r](o[0], o[1]);
break;
case 2:
for (var a = [], s = 0; s < this.i; s++)
for (var a = [], c = 0; c < this.i; c++)
a.unshift(e.f.pop());
e.r[3] = new e.r[this.s](a[0],a[1])
}
......@@ -342,9 +342,10 @@ global.window = window;
u.prototype.e = function(e) {
var t = this
, n = [0];
e.k.forEach(function(e) {
e.k.forEach((function(e) {
n.push(e)
});
}
));
var r = function(r) {
var o = new G;
return o.k = n,
......@@ -380,10 +381,10 @@ global.window = window;
r.push(t.charCodeAt(o) << 8 | t.charCodeAt(o + 1));
this.G = r;
for (var i = [], a = n + 2; a < t.length; ) {
var s = t.charCodeAt(a) << 8 | t.charCodeAt(a + 1)
, c = t.slice(a + 2, a + 2 + s);
i.push(c),
a += s + 2
var c = t.charCodeAt(a) << 8 | t.charCodeAt(a + 1)
, s = t.slice(a + 2, a + 2 + c);
i.push(s),
a += c + 2
}
this.b = i
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment