# -*- coding:UTF-8 -*- # @Time : 2021/1/8 16:41 # @File : xiaohongshu_to_rpc.py # @email : litao@igengmei.com # @author : litao import copy import datetime import hashlib import random import time import redis, json from crawler.crawler_sys.utils.rpc_data_to_answer import post_single_data,post_muilty_data from crawler_sys.utils.output_results import retry_get_url from crawler.gm_upload.gm_upload import upload, upload_file from crawler.crawler_sys.scheduler.redis_to_rpc.rpc_config import * gm_user_id_list = [ '5cca9b3700000000120314c9', '5aa0f7bae8ac2b65bfcdaf0e', '5c20dd200000000007027c07', '5fe1c1ba0000000001006e65'] def xiaohongshu_pc(res_json,pid): video_dic = {} qiniu_img_list = [] try: pid = res_json["NoteView"]["commentInfo"]["targetNoteId"] except: pid = res_json["NoteView"]["content"]["id"] for img_url in res_json["NoteView"]["content"]["imageList"]: try: img_wb = retry_get_url("http:" + img_url["url"].replace(img_url['fileId'], img_url['traceId'])).content res = upload(img_wb, img_type=99) # print(res) img_info = retry_get_url(res + "-imageinfo") img_info_json = img_info.json() qiniu_img_list.append('<img src="' + res + '-w">') except Exception as e: print("down load img error %s" % e) continue try: desc_fix = "<p>" + res_json["NoteView"]["content"]['desc'].replace('\n', '<br>') + "".join( qiniu_img_list) + "</p>" res_json["NoteView"]["content"]["desc_fix"] = desc_fix if res_json["NoteView"]["author"]['id'] in gm_user_id_list: video_dic["level"] = "5" else: video_dic["level"] = "3" video_dic["platform"] = "9" video_dic["platform_id"] = pid video_dic["platform_answer_id"] = pid video_dic["title"] = res_json["NoteView"]["content"]["title"] user_id_list_copy = copy.deepcopy(user_id_list) qustion_id = random.choice(user_id_list_copy) user_id_list_copy.remove(qustion_id) video_dic["user_id"] = qustion_id create_time = datetime.datetime.strptime(res_json["NoteView"]["content"]["time"], '%Y-%m-%d %H:%M') video_dic["create_time"] = create_time.timestamp() rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/question/batch_create") # print(rpc_res) video_dic["platform_question_id"] = pid video_dic["content"] = desc_fix video_dic["user_id"] = random.choice(user_id_list_copy) rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/answer/batch_create") comment_list = [] try: if res_json["NoteView"].get("comments"): # print(res_json["NoteView"].get("data")) for comment in res_json["NoteView"]["comments"]["data"]: video_dic["content"] = comment['content'] video_dic["platform_id"] = comment['id'] comment_id_list_copy = copy.deepcopy(majiayonghu_list) comment_id = random.choice(comment_id_list_copy) video_dic["user_id"] = comment_id comment_id_list_copy.remove(comment_id) video_dic["create_time"] = (create_time + datetime.timedelta(hours=random.randint(0, 24), minutes=random.randint(0, 60))).timestamp() comment_list.append(copy.deepcopy(video_dic)) # rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/reply/batch_create") elif res_json["NoteView"].get("commentInfo"): for comment in res_json["NoteView"]["commentInfo"]["comments"]: video_dic["content"] = comment['content'] video_dic["platform_id"] = comment['id'] comment_id_list_copy = copy.deepcopy(majiayonghu_list) comment_id = random.choice(comment_id_list_copy) video_dic["user_id"] = comment_id comment_id_list_copy.remove(comment_id) video_dic["create_time"] = (create_time + datetime.timedelta(hours=random.randint(0, 24), minutes=random.randint(0, 60))).timestamp() comment_list.append(copy.deepcopy(video_dic)) # rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/reply/batch_create") if comment_list: rpc_res = post_muilty_data(comment_list, "cims/reply/batch_create") except Exception as e: print("comment error") print(e) except Exception as e: print(e) return video_dic def xiaohongshu_xiaochengxu(res_json): video_dic = {} qiniu_img_list = [] try: pid = res_json["data"]["id"] except: pass for img_url in res_json["data"]["imageList"]: try: img_wb = retry_get_url(img_url["url"].replace(img_url['fileId'], img_url['traceId'])).content res = upload(img_wb, img_type=99) # print(res) img_info = retry_get_url(res + "-imageinfo") img_info_json = img_info.json() qiniu_img_list.append('<img src="' + res + '-w">') except Exception as e: print("down load img error %s" % e) continue try: desc_fix = "<p>" + res_json["data"]['desc'].replace('\n', '<br>') + "".join( qiniu_img_list) + "</p>" if res_json["data"]["user"]['id'] in gm_user_id_list: video_dic["level"] = "5" else: video_dic["level"] = "3" video_dic["platform"] = "9" video_dic["platform_id"] = pid video_dic["platform_answer_id"] = pid video_dic["title"] = res_json["data"]["title"] user_id_list_copy = copy.deepcopy(user_id_list) qustion_id = random.choice(user_id_list_copy) user_id_list_copy.remove(qustion_id) video_dic["user_id"] = qustion_id create_time = datetime.datetime.strptime(res_json["data"]["time"], '%Y-%m-%d %H:%M') video_dic["create_time"] = create_time.timestamp() rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/question/batch_create") # print(rpc_res) video_dic["platform_question_id"] = pid video_dic["content"] = desc_fix video_dic["user_id"] = random.choice(user_id_list_copy) rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/answer/batch_create") comment_list = [] try: if res_json["data"].get("commentList"): # print(res_json["NoteView"].get("data")) for comment in res_json["data"]["commentList"]: video_dic["content"] = comment['content'] video_dic["platform_id"] = hashlib.md5((comment['user']['id']+comment['content']).encode("utf8")).hexdigest() comment_id_list_copy = copy.deepcopy(majiayonghu_list) comment_id = random.choice(comment_id_list_copy) video_dic["user_id"] = comment_id comment_id_list_copy.remove(comment_id) video_dic["create_time"] = (create_time + datetime.timedelta(hours=random.randint(0, 24), minutes=random.randint(0, 60))).timestamp() comment_list.append(copy.deepcopy(video_dic)) # rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/reply/batch_create") # rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/reply/batch_create") if comment_list: rpc_res = post_muilty_data(comment_list, "cims/reply/batch_create") except Exception as e: print("comment error") print(e) except Exception as e: print(e) return video_dic rds = redis.StrictRedis(host='172.18.51.10', port=6379, db=20, decode_responses=True) while True: if rds.hlen("xiaohongshu"): pid_list = rds.hkeys("xiaohongshu") for pid in pid_list: if rds.sismember("xiaohongshu_exists_set", pid): rds.hdel("xiaohongshu", pid) continue line = rds.hget("xiaohongshu", pid) res_json = json.loads(line) if res_json.get("NoteView"): xiaohongshu_pc(res_json,pid) elif res_json.get("data"): xiaohongshu_xiaochengxu(res_json) rds.hdel("xiaohongshu",pid) rds.sadd("xiaohongshu_exists_set",pid) else: time.sleep(5)