Commit 1e32f0fc authored by 段英荣's avatar 段英荣

加入并发

parent 6fda3ffb
...@@ -39,7 +39,7 @@ import os ...@@ -39,7 +39,7 @@ import os
from gevent import monkey; monkey.patch_socket() from gevent import monkey; monkey.patch_socket()
import gevent import gevent
from threading import Thread, Lock from threading import Thread, Lock
import urllib.parse
...@@ -364,9 +364,23 @@ class ZhihuAccount(object): ...@@ -364,9 +364,23 @@ class ZhihuAccount(object):
cur_image_index = 0 cur_image_index = 0
for begin_index in range(0,200,10): for begin_index in range(0,200,10):
query_by_word_url = "https://www.zhihu.com/api/v4/search_v3?t=general&correction=1&lc_idx=62&" \ # query_by_word_url = "https://www.zhihu.com/api/v4/search_v3?t=general&correction=1&lc_idx=62&" \
"show_all_topics=0&search_hash_id=1dbb1e923a17f147356177932d1236e1&" \ # "show_all_topics=0&search_hash_id=1dbb1e923a17f147356177932d1236e1&" \
"vertical_info=0%2C0%2C0%2C0%2C0%2C0%2C0%2C0%2C0%2C1" + "&q=" + str(query_word) + "&offset=" + str(begin_index) + "&limit=10" # "vertical_info=0%2C0%2C0%2C0%2C0%2C0%2C0%2C0%2C0%2C1" + "&q=" + str(query_word) + "&offset=" + str(begin_index) + "&limit=10"
query_by_word_url = "https://www.zhihu.com/api/v4/search_v3?"
query_params_dict = {
"q": query_word,
"offset": begin_index,
"limit": 10,
"lc_idx": 22,
"show_all_topics": 0,
"search_hash_id": "dc4a11848e2540981cf28634ff3609c0",
"vertical_info": "0,0,0,0,0,0,0,0,0,1",
"correction": 1,
"t": "general"
}
query_by_word_url += urllib.parse.urlencode(query_params_dict)
res = self.session.get(query_by_word_url,allow_redirects=False) res = self.session.get(query_by_word_url,allow_redirects=False)
print(10*"*") print(10*"*")
...@@ -455,9 +469,19 @@ class ZhihuAccount(object): ...@@ -455,9 +469,19 @@ class ZhihuAccount(object):
def zhihu_answers_list_by_question(self,question_id,question_answer_dict,zhihu_spider_fd,cur_image_index,cur_word_index): def zhihu_answers_list_by_question(self,question_id,question_answer_dict,zhihu_spider_fd,cur_image_index,cur_word_index):
for begin_index in range(0,200,10): for begin_index in range(0,200,10):
answers_list_by_question_url = "https://www.zhihu.com/api/v4/questions/" + str(question_id) + \ # answers_list_by_question_url = "https://www.zhihu.com/api/v4/questions/" + str(question_id) + \
"/answers?include=data%5B*%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cis_labeled%2Cis_recognized%2Cpaid_info%2Cpaid_info_content%3Bdata%5B*%5D.mark_infos%5B*%5D.url%3Bdata%5B*%5D.author.follower_count%2Cbadge%5B*%5D.topics&" \ # "/answers?include=data%5B*%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cis_labeled%2Cis_recognized%2Cpaid_info%2Cpaid_info_content%3Bdata%5B*%5D.mark_infos%5B*%5D.url%3Bdata%5B*%5D.author.follower_count%2Cbadge%5B*%5D.topics&" \
"sort_by=default&platform=desktop" + "&offset=" + str(begin_index) + "&limit=10" # "sort_by=default&platform=desktop" + "&offset=" + str(begin_index) + "&limit=10"
answers_list_by_question_url = "https://www.zhihu.com/api/v4/questions/" + str(question_id) + "/answers?"
query_params_dict = {
"include": "data[*].is_normal,admin_closed_comment,reward_info,is_collapsed,annotation_action,annotation_detail,collapse_reason,is_sticky,collapsed_by,suggest_edit,comment_count,can_comment,content,editable_content,voteup_count,reshipment_settings,comment_permission,created_time,updated_time,review_info,relevant_info,question,excerpt,relationship.is_authorized,is_author,voting,is_thanked,is_nothelp,is_labeled,is_recognized,paid_info,paid_info_content;data[*].mark_infos[*].url;data[*].author.follower_count,badge[*].topics",
"offset": begin_index,
"limit": 10,
"sort_by": "default",
"platform": "desktop"
}
answers_list_by_question_url += urllib.parse.urlencode(query_params_dict)
res = self.session.get(answers_list_by_question_url,allow_redirects=False) res = self.session.get(answers_list_by_question_url,allow_redirects=False)
print(10*"*") print(10*"*")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment