Commit 0797736e authored by 段英荣's avatar 段英荣

增加搜索爬取功能

parent 21612b5f
......@@ -248,31 +248,32 @@ class ZhihuAccount(object):
if "data" in raw_content_dict:
for data_item in raw_content_dict["data"]:
data_type = data_item["object"]["type"]
content = data_item["object"]["content"]
platform_id = data_item["object"]["id"]
user_id = random.choice(majia_user_list)
question_id = ""
if data_type == "article":
title = data_item["object"]["title"]
elif data_type == "answer":
title = data_item["object"]["question"]["name"]
question_id = data_item["object"]["question"]["id"]
else:
print("type is:%s" % data_type)
title = ""
item_dict = {
"user_id": user_id,
"platform_id": platform_id,
"title": title,
"content": content,
"type": data_type,
"question_id": question_id
}
zhihu_spider_fd.write(json.dumps(item_dict) + "\n")
if data_item["type"] == "search_result":
data_type = data_item["object"]["type"]
content = data_item["object"]["content"]
platform_id = data_item["object"]["id"]
user_id = random.choice(majia_user_list)
question_id = ""
if data_type == "article":
title = data_item["object"]["title"]
elif data_type == "answer":
title = data_item["object"]["question"]["name"]
question_id = data_item["object"]["question"]["id"]
else:
print("type is:%s" % data_type)
title = ""
item_dict = {
"user_id": user_id,
"platform_id": platform_id,
"title": title,
"content": content,
"type": data_type,
"question_id": question_id
}
zhihu_spider_fd.write(json.dumps(item_dict) + "\n")
time.sleep(2)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment