diff --git a/crawler_sys/scheduler/xiaohongshu_to_rpc.py b/crawler_sys/scheduler/xiaohongshu_to_rpc.py index a582769f594f7e6dcbcd9e6140ed60fc7e36f5ff..49405cfb35653a8bc93be15a0be52b9838e74785 100644 --- a/crawler_sys/scheduler/xiaohongshu_to_rpc.py +++ b/crawler_sys/scheduler/xiaohongshu_to_rpc.py @@ -122,50 +122,55 @@ for pid in pid_list: print("down load img error %s" % e) continue # print(qiniu_img_list) - desc_fix = "<p>" + res_json["NoteView"]["content"]['desc'].replace('\n', '<br>') + "".join(qiniu_img_list) + "</p>" - res_json["NoteView"]["content"]["desc_fix"] = desc_fix - # print(desc_fix) - res = rds.hset("xiaohongshu_with_img", key=pid, value=json.dumps(res_json)) - video_dic["platform"] = "9" - video_dic["platform_id"] = pid - video_dic["platform_answer_id"] = pid - video_dic["title"] = res_json["NoteView"]["content"]["title"] - - user_id_list_copy = copy.deepcopy(user_id_list) - qustion_id = random.choice(user_id_list_copy) - user_id_list_copy.remove(qustion_id) - video_dic["user_id"] = qustion_id - create_time = datetime.datetime.strptime(res_json["NoteView"]["content"]["time"], - '%Y-%m-%d %H:%M') - video_dic["create_time"] = create_time.timestamp() - rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/question/batch_create") - # print(rpc_res) - video_dic["platform_question_id"] = pid - video_dic["content"] = desc_fix - video_dic["user_id"] = random.choice(user_id_list_copy) - rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/answer/batch_create") try: - if res_json["NoteView"].get("comments"): - # print(res_json["NoteView"].get("data")) - for comment in res_json["NoteView"]["comments"]["data"]: - video_dic["content"] = comment['content'] - comment_id_list_copy = copy.deepcopy(majiayonghu_list) - comment_id = random.choice(comment_id_list_copy) - video_dic["user_id"] = comment_id - comment_id_list_copy.remove(comment_id) - video_dic["create_time"] = (create_time + datetime.timedelta(hours=random.randint(0, 24), - minutes=random.randint(0, 60))).timestamp() - rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/reply/batch_create") - elif res_json["NoteView"].get("commentInfo"): - for comment in res_json["NoteView"]["commentInfo"]["comments"]: - video_dic["content"] = comment['content'] - comment_id_list_copy = copy.deepcopy(majiayonghu_list) - comment_id = random.choice(comment_id_list_copy) - video_dic["user_id"] = comment_id - comment_id_list_copy.remove(comment_id) - video_dic["create_time"] = (create_time + datetime.timedelta(hours=random.randint(0,24),minutes=random.randint(0,60))).timestamp() - rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/reply/batch_create") + desc_fix = "<p>" + res_json["NoteView"]["content"]['desc'].replace('\n', '<br>') + "".join(qiniu_img_list) + "</p>" + res_json["NoteView"]["content"]["desc_fix"] = desc_fix + # print(desc_fix) + res = rds.hset("xiaohongshu_with_img", key=pid, value=json.dumps(res_json)) + video_dic["platform"] = "9" + video_dic["platform_id"] = pid + video_dic["platform_answer_id"] = pid + video_dic["title"] = res_json["NoteView"]["content"]["title"] + + user_id_list_copy = copy.deepcopy(user_id_list) + qustion_id = random.choice(user_id_list_copy) + user_id_list_copy.remove(qustion_id) + video_dic["user_id"] = qustion_id + create_time = datetime.datetime.strptime(res_json["NoteView"]["content"]["time"], + '%Y-%m-%d %H:%M') + video_dic["create_time"] = create_time.timestamp() + rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/question/batch_create") + # print(rpc_res) + video_dic["platform_question_id"] = pid + video_dic["content"] = desc_fix + video_dic["user_id"] = random.choice(user_id_list_copy) + rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/answer/batch_create") + try: + if res_json["NoteView"].get("comments"): + # print(res_json["NoteView"].get("data")) + for comment in res_json["NoteView"]["comments"]["data"]: + video_dic["content"] = comment['content'] + comment_id_list_copy = copy.deepcopy(majiayonghu_list) + comment_id = random.choice(comment_id_list_copy) + video_dic["user_id"] = comment_id + comment_id_list_copy.remove(comment_id) + video_dic["create_time"] = (create_time + datetime.timedelta(hours=random.randint(0, 24), + minutes=random.randint(0, 60))).timestamp() + rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/reply/batch_create") + elif res_json["NoteView"].get("commentInfo"): + for comment in res_json["NoteView"]["commentInfo"]["comments"]: + video_dic["content"] = comment['content'] + comment_id_list_copy = copy.deepcopy(majiayonghu_list) + comment_id = random.choice(comment_id_list_copy) + video_dic["user_id"] = comment_id + comment_id_list_copy.remove(comment_id) + video_dic["create_time"] = (create_time + datetime.timedelta(hours=random.randint(0,24),minutes=random.randint(0,60))).timestamp() + rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/reply/batch_create") + except Exception as e: + print("comment error") + print(e) except Exception as e: - print("comment error") print(e) + continue + # break \ No newline at end of file