fix

27945ac0 · litaolemo · a5fc987c · 27945ac0 · 27945ac0
Commit 27945ac0 authored Jan 13, 2021 by litaolemo
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 30 additions and 22 deletions

josnfile.json crawler_sys/scheduler/josnfile.json +0 -0

xiaohongshu_to_rpc.py crawler_sys/scheduler/xiaohongshu_to_rpc.py +30 -22

No files found.
--- a/crawler_sys/scheduler/josnfile.json
+++ b/crawler_sys/scheduler/josnfile.json
--- a/crawler_sys/scheduler/xiaohongshu_to_rpc.py
+++ b/crawler_sys/scheduler/xiaohongshu_to_rpc.py
@@ -10,10 +10,10 @@ import random
 import redis, json
 from crawler.crawler_sys.utils.rpc_data_to_answer import post_single_data,post_muilty_data
 from crawler_sys.utils.output_results import retry_get_url
-from crawler.gm_upload.gm_upload import upload, upload_file
+# from crawler.gm_upload.gm_upload import upload, upload_file

 gm_user_id_list = [
-    '5cca9b3700000000120314c9',
+'5cca9b3700000000120314c9',
 '5aa0f7bae8ac2b65bfcdaf0e',
 '5c20dd200000000007027c07',
 '5fe1c1ba0000000001006e65']
@@ -108,34 +108,40 @@ user_id_list = [
                31358658,
                ]

+f= open("josnfile.json","r",encoding='utf-8')
 rds = redis.StrictRedis(host='172.18.51.10', port=6379, db=17, decode_responses=True)
 pid_list = rds.hkeys("xiaohongshu")
-for pid in pid_list:
-    res = rds.hget("xiaohongshu", pid)
+for line in f:
+# for pid in f:
+    # res = rds.hget("xiaohongshu", pid)
    # if rds.hexists("xiaohongshu_with_img", pid):
    #     continue
-    res_json = json.loads(res)
+    res_json = json.loads(line)
    video_dic = {}
    qiniu_img_list = []
-    print(pid)
-    for img_url in res_json["NoteView"]["content"]["imageList"]:
-        try:
-            img_wb = retry_get_url("http:" + img_url["url"].replace(img_url['fileId'],img_url['traceId'])).content
-            res = upload(img_wb, img_type=99)
-            # print(res)
-            img_info = retry_get_url(res + "-imageinfo")
-            img_info_json = img_info.json()
-            qiniu_img_list.append('<img src="' + res + '-w">')
-        except Exception as e:
-            print("down load img error %s" % e)
-            continue
+    print(res_json)
+    pid = res_json["NoteView"]["id"]
+    # for img_url in res_json["NoteView"]["content"]["imageList"]:
+    #     try:
+    #         img_wb = retry_get_url("http:" + img_url["url"].replace(img_url['fileId'],img_url['traceId'])).content
+    #         res = upload(img_wb, img_type=99)
+    #         # print(res)
+    #         img_info = retry_get_url(res + "-imageinfo")
+    #         img_info_json = img_info.json()
+    #         qiniu_img_list.append('<img src="' + res + '-w">')
+    #     except Exception as e:
+    #         print("down load img error %s" % e)
+    #         continue
    # print(qiniu_img_list)
    try:
    # if True:
-        desc_fix = "<p>" + res_json["NoteView"]["content"]['desc'].replace('\n', '<br>') + "".join(qiniu_img_list) + "</p>"
-        res_json["NoteView"]["content"]["desc_fix"] = desc_fix
+    #     desc_fix = "<p>" + res_json["NoteView"]["content"]['desc'].replace('\n', '<br>') + "".join(qiniu_img_list) + "</p>"
+    #     res_json["NoteView"]["content"]["desc_fix"] = desc_fix
+        desc_fix = res_json["NoteView"]["content"]["desc_fix"]
        # print(desc_fix)
-        res = rds.hset("xiaohongshu_with_img", key=pid, value=json.dumps(res_json))
+        # f.write(json.dumps(res_json) + "\n")
+        # f.flush()
+        # res = rds.hset("xiaohongshu_with_img", key=pid, value=json.dumps(res_json))
        if res_json["NoteView"]["author"]['id'] in gm_user_id_list:
            video_dic["level"] = "5"
        else:
@@ -193,4 +199,6 @@ for pid in pid_list:
        print(e)
        continue

-    # break
\ No newline at end of file
+    # break
+# f.flush()
+# f.close()
\ No newline at end of file