Commit 27945ac0 authored by litaolemo's avatar litaolemo

fix

parent a5fc987c
This diff is collapsed.
...@@ -10,10 +10,10 @@ import random ...@@ -10,10 +10,10 @@ import random
import redis, json import redis, json
from crawler.crawler_sys.utils.rpc_data_to_answer import post_single_data,post_muilty_data from crawler.crawler_sys.utils.rpc_data_to_answer import post_single_data,post_muilty_data
from crawler_sys.utils.output_results import retry_get_url from crawler_sys.utils.output_results import retry_get_url
from crawler.gm_upload.gm_upload import upload, upload_file # from crawler.gm_upload.gm_upload import upload, upload_file
gm_user_id_list = [ gm_user_id_list = [
'5cca9b3700000000120314c9', '5cca9b3700000000120314c9',
'5aa0f7bae8ac2b65bfcdaf0e', '5aa0f7bae8ac2b65bfcdaf0e',
'5c20dd200000000007027c07', '5c20dd200000000007027c07',
'5fe1c1ba0000000001006e65'] '5fe1c1ba0000000001006e65']
...@@ -108,34 +108,40 @@ user_id_list = [ ...@@ -108,34 +108,40 @@ user_id_list = [
31358658, 31358658,
] ]
f= open("josnfile.json","r",encoding='utf-8')
rds = redis.StrictRedis(host='172.18.51.10', port=6379, db=17, decode_responses=True) rds = redis.StrictRedis(host='172.18.51.10', port=6379, db=17, decode_responses=True)
pid_list = rds.hkeys("xiaohongshu") pid_list = rds.hkeys("xiaohongshu")
for pid in pid_list: for line in f:
res = rds.hget("xiaohongshu", pid) # for pid in f:
# res = rds.hget("xiaohongshu", pid)
# if rds.hexists("xiaohongshu_with_img", pid): # if rds.hexists("xiaohongshu_with_img", pid):
# continue # continue
res_json = json.loads(res) res_json = json.loads(line)
video_dic = {} video_dic = {}
qiniu_img_list = [] qiniu_img_list = []
print(pid) print(res_json)
for img_url in res_json["NoteView"]["content"]["imageList"]: pid = res_json["NoteView"]["id"]
try: # for img_url in res_json["NoteView"]["content"]["imageList"]:
img_wb = retry_get_url("http:" + img_url["url"].replace(img_url['fileId'],img_url['traceId'])).content # try:
res = upload(img_wb, img_type=99) # img_wb = retry_get_url("http:" + img_url["url"].replace(img_url['fileId'],img_url['traceId'])).content
# print(res) # res = upload(img_wb, img_type=99)
img_info = retry_get_url(res + "-imageinfo") # # print(res)
img_info_json = img_info.json() # img_info = retry_get_url(res + "-imageinfo")
qiniu_img_list.append('<img src="' + res + '-w">') # img_info_json = img_info.json()
except Exception as e: # qiniu_img_list.append('<img src="' + res + '-w">')
print("down load img error %s" % e) # except Exception as e:
continue # print("down load img error %s" % e)
# continue
# print(qiniu_img_list) # print(qiniu_img_list)
try: try:
# if True: # if True:
desc_fix = "<p>" + res_json["NoteView"]["content"]['desc'].replace('\n', '<br>') + "".join(qiniu_img_list) + "</p>" # desc_fix = "<p>" + res_json["NoteView"]["content"]['desc'].replace('\n', '<br>') + "".join(qiniu_img_list) + "</p>"
res_json["NoteView"]["content"]["desc_fix"] = desc_fix # res_json["NoteView"]["content"]["desc_fix"] = desc_fix
desc_fix = res_json["NoteView"]["content"]["desc_fix"]
# print(desc_fix) # print(desc_fix)
res = rds.hset("xiaohongshu_with_img", key=pid, value=json.dumps(res_json)) # f.write(json.dumps(res_json) + "\n")
# f.flush()
# res = rds.hset("xiaohongshu_with_img", key=pid, value=json.dumps(res_json))
if res_json["NoteView"]["author"]['id'] in gm_user_id_list: if res_json["NoteView"]["author"]['id'] in gm_user_id_list:
video_dic["level"] = "5" video_dic["level"] = "5"
else: else:
...@@ -193,4 +199,6 @@ for pid in pid_list: ...@@ -193,4 +199,6 @@ for pid in pid_list:
print(e) print(e)
continue continue
# break # break
\ No newline at end of file # f.flush()
# f.close()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment