Commit 0797736e authored by 段英荣's avatar 段英荣

增加搜索爬取功能

parent 21612b5f
...@@ -248,31 +248,32 @@ class ZhihuAccount(object): ...@@ -248,31 +248,32 @@ class ZhihuAccount(object):
if "data" in raw_content_dict: if "data" in raw_content_dict:
for data_item in raw_content_dict["data"]: for data_item in raw_content_dict["data"]:
data_type = data_item["object"]["type"] if data_item["type"] == "search_result":
content = data_item["object"]["content"] data_type = data_item["object"]["type"]
platform_id = data_item["object"]["id"] content = data_item["object"]["content"]
user_id = random.choice(majia_user_list) platform_id = data_item["object"]["id"]
question_id = "" user_id = random.choice(majia_user_list)
question_id = ""
if data_type == "article":
title = data_item["object"]["title"] if data_type == "article":
elif data_type == "answer": title = data_item["object"]["title"]
title = data_item["object"]["question"]["name"] elif data_type == "answer":
question_id = data_item["object"]["question"]["id"] title = data_item["object"]["question"]["name"]
else: question_id = data_item["object"]["question"]["id"]
print("type is:%s" % data_type) else:
title = "" print("type is:%s" % data_type)
title = ""
item_dict = {
"user_id": user_id, item_dict = {
"platform_id": platform_id, "user_id": user_id,
"title": title, "platform_id": platform_id,
"content": content, "title": title,
"type": data_type, "content": content,
"question_id": question_id "type": data_type,
} "question_id": question_id
}
zhihu_spider_fd.write(json.dumps(item_dict) + "\n")
zhihu_spider_fd.write(json.dumps(item_dict) + "\n")
time.sleep(2) time.sleep(2)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment