Commit 79bbb8d0 authored by litaolemo's avatar litaolemo

update

parent cfb67895
# -*- coding:UTF-8 -*-
# @Time : 2021/1/14 19:53
# @File : __init__.py.py
# @email : litao@igengmei.com
# @author : litao
\ No newline at end of file
# -*- coding:UTF-8 -*-
# @Time : 2021/1/14 19:54
# @File : rpc_config.py
# @email : litao@igengmei.com
# @author : litao
majiayonghu_list = [
36436814,
36436809,
36436805,
36436803,
36436800,
36436797,
36436794,
36436793,
36436787,
36436782,
36436769,
36436763,
36436758,
36436756,
36436749,
36436745,
36436738,
36436731,
36436729,
36436725,
36436720,
36436717,
36436716,
36436709,
36436703,
36436701,
36436690,
36436689,
36436685,
36436674,
36426171,
36426170,
36426169,
36426168,
36426167,
36426166,
36426165,
36426164,
36426163,
36426162,
36426161,
36426160,
36426159,
36426158,
36426157,
36426156,
36426155,
36426154,
36426153,
36426152,
36426150,
36426149,
36426148,
36426147,
36426146,
36426145,
36426143,
36426141,
36368922,
36368921,
36368920,
36368918,
36368917,
]
user_id_list = [
29865245,
36426151,
36426142,
36427666,
36427661,
36427657,
36427655,
36427634,
33524762,
33524779,
33524697,
30963358,
31293584,
31358392,
31358396,
31358397,
31358419,
31358448,
31358610,
31358658,
]
...@@ -12,6 +12,7 @@ import redis, json ...@@ -12,6 +12,7 @@ import redis, json
from crawler.crawler_sys.utils.rpc_data_to_answer import post_single_data,post_muilty_data from crawler.crawler_sys.utils.rpc_data_to_answer import post_single_data,post_muilty_data
from crawler_sys.utils.output_results import retry_get_url from crawler_sys.utils.output_results import retry_get_url
from crawler.gm_upload.gm_upload import upload, upload_file from crawler.gm_upload.gm_upload import upload, upload_file
from crawler.crawler_sys.scheduler.redis_to_rpc.rpc_config import *
gm_user_id_list = [ gm_user_id_list = [
"3236957071", "3236957071",
...@@ -26,160 +27,70 @@ gm_user_id_list = [ ...@@ -26,160 +27,70 @@ gm_user_id_list = [
"7048594049", "7048594049",
] ]
majiayonghu_list = [
36436814,
36436809,
36436805,
36436803,
36436800,
36436797,
36436794,
36436793,
36436787,
36436782,
36436769,
36436763,
36436758,
36436756,
36436749,
36436745,
36436738,
36436731,
36436729,
36436725,
36436720,
36436717,
36436716,
36436709,
36436703,
36436701,
36436690,
36436689,
36436685,
36436674,
36426171,
36426170,
36426169,
36426168,
36426167,
36426166,
36426165,
36426164,
36426163,
36426162,
36426161,
36426160,
36426159,
36426158,
36426157,
36426156,
36426155,
36426154,
36426153,
36426152,
36426150,
36426149,
36426148,
36426147,
36426146,
36426145,
36426143,
36426141,
36368922,
36368921,
36368920,
36368918,
36368917,
]
user_id_list = [
29865245,
36426151,
36426142,
36427666,
36427661,
36427657,
36427655,
36427634,
33524762,
33524779,
33524697,
30963358,
31293584,
31358392,
31358396,
31358397,
31358419,
31358448,
31358610,
31358658,
]
# f= open("josnfile.json","r",encoding='utf-8')
rds = redis.StrictRedis(host='172.18.51.10', port=6379, db=17, decode_responses=True) rds = redis.StrictRedis(host='172.18.51.10', port=6379, db=17, decode_responses=True)
pid_list = rds.hkeys("weibo") while True:
# for line in f: if rds.hlen("weibo"):
for pid in pid_list: pid_list = rds.hkeys("weibo")
res = rds.hget("weibo", pid) for pid in pid_list:
if rds.hexists("weibo_with_img", pid): if rds.sismember("weibo_exists_set", pid):
continue rds.hdel("weibo", pid)
res_json = json.loads(res) continue
video_dic = {} res = rds.hget("weibo", pid)
qiniu_img_list = [] res_json = json.loads(res)
# print(res_json) video_dic = {}
if "http://t.cn/" in res_json["title"]: qiniu_img_list = []
continue # print(res_json)
for img_url in res_json["img_list"]: if "http://t.cn/" in res_json["title"]:
try: continue
img_wb = retry_get_url(img_url.replace("large", "sq480")).content for img_url in res_json["img_list"]:
res = upload(img_wb, img_type=99) try:
# print(res) img_wb = retry_get_url(img_url.replace("large", "sq480")).content
img_info = retry_get_url(res + "-imageinfo") res = upload(img_wb, img_type=99)
img_info_json = img_info.json() # print(res)
qiniu_img_list.append('<img src="' + res + '-w">') img_info = retry_get_url(res + "-imageinfo")
except Exception as e: img_info_json = img_info.json()
print("down load img error %s" % e) qiniu_img_list.append('<img src="' + res + '-w">')
continue except Exception as e:
print(qiniu_img_list) print("down load img error %s" % e)
try: continue
# if True: print(qiniu_img_list)
try: try:
title = res_json["title"].split("\n")[0] # if True:
except: try:
title = res_json["title"] title = res_json["title"].split("\n")[0]
desc_fix = "<p>" + res_json["title"].replace('\n', '<br>') + "".join(qiniu_img_list) + "</p>" except:
res_json["desc_fix"] = desc_fix title = res_json["title"]
# print(desc_fix) desc_fix = "<p>" + res_json["title"].replace('\n', '<br>') + "".join(qiniu_img_list) + "</p>"
# f.write(json.dumps(res_json) + "\n") res_json["desc_fix"] = desc_fix
# f.flush() # print(desc_fix)
res = rds.hset("weibo_with_img", key=pid, value=json.dumps(res_json)) # f.write(json.dumps(res_json) + "\n")
if res_json["releaser_id_str"].replace("weibo_","") in gm_user_id_list: # f.flush()
video_dic["level"] = "5" res = rds.hset("weibo_with_img", key=pid, value=json.dumps(res_json))
else: if res_json["releaser_id_str"].replace("weibo_","") in gm_user_id_list:
video_dic["level"] = "3" video_dic["level"] = "5"
video_dic["platform"] = "2" else:
video_dic["platform_id"] = pid video_dic["level"] = "3"
video_dic["platform_answer_id"] = pid video_dic["platform"] = "2"
video_dic["title"] = title video_dic["platform_id"] = pid
video_dic["platform_answer_id"] = pid
video_dic["title"] = title
user_id_list_copy = copy.deepcopy(user_id_list)
qustion_id = random.choice(user_id_list_copy)
user_id_list_copy.remove(qustion_id)
video_dic["user_id"] = qustion_id
create_time = int(res_json["release_time"]/1e3)
video_dic["create_time"] = create_time
rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/question/batch_create")
# print(rpc_res)
video_dic["platform_question_id"] = pid
video_dic["content"] = desc_fix
video_dic["user_id"] = random.choice(user_id_list_copy)
rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/answer/batch_create")
user_id_list_copy = copy.deepcopy(user_id_list) except Exception as e:
qustion_id = random.choice(user_id_list_copy) print(e)
user_id_list_copy.remove(qustion_id) continue
video_dic["user_id"] = qustion_id
create_time = int(res_json["release_time"]/1e3)
video_dic["create_time"] = create_time
rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/question/batch_create")
# print(rpc_res)
video_dic["platform_question_id"] = pid
video_dic["content"] = desc_fix
video_dic["user_id"] = random.choice(user_id_list_copy)
rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/answer/batch_create")
except Exception as e: rds.hdel("weibo", pid)
print(e)
continue
# break
# f.flush()
# f.close()
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment