Commit 0a621d96 authored by litaolemo's avatar litaolemo

update

parent 7568bd92
# -*- coding:UTF-8 -*-
# @Time : 2021/1/13 14:06
# @File : weibo_to_rpc.py
# @email : litao@igengmei.com
# @author : litao
import copy
import datetime
import random
import redis, json
# from crawler.crawler_sys.utils.rpc_data_to_answer import post_single_data,post_muilty_data
from crawler_sys.utils.output_results import retry_get_url
from crawler.gm_upload.gm_upload import upload, upload_file
gm_user_id_list = [
"3236957071",
"5147711482",
"6628617667",
"6855680230",
"5836153857",
"5145935171",
"5143063731",
"5144888803",
"6431905918",
"7048594049",
]
majiayonghu_list = [
36436814,
36436809,
36436805,
36436803,
36436800,
36436797,
36436794,
36436793,
36436787,
36436782,
36436769,
36436763,
36436758,
36436756,
36436749,
36436745,
36436738,
36436731,
36436729,
36436725,
36436720,
36436717,
36436716,
36436709,
36436703,
36436701,
36436690,
36436689,
36436685,
36436674,
36426171,
36426170,
36426169,
36426168,
36426167,
36426166,
36426165,
36426164,
36426163,
36426162,
36426161,
36426160,
36426159,
36426158,
36426157,
36426156,
36426155,
36426154,
36426153,
36426152,
36426150,
36426149,
36426148,
36426147,
36426146,
36426145,
36426143,
36426141,
36368922,
36368921,
36368920,
36368918,
36368917,
]
user_id_list = [
29865245,
36426151,
36426142,
36427666,
36427661,
36427657,
36427655,
36427634,
33524762,
33524779,
33524697,
30963358,
31293584,
31358392,
31358396,
31358397,
31358419,
31358448,
31358610,
31358658,
]
# f= open("josnfile.json","r",encoding='utf-8')
rds = redis.StrictRedis(host='172.18.51.10', port=6379, db=17, decode_responses=True)
pid_list = rds.hkeys("weibo")
# for line in f:
for pid in pid_list:
res = rds.hget("weibo", pid)
# if rds.hexists("xiaohongshu_with_img", pid):
# continue
res_json = json.loads(res)
video_dic = {}
qiniu_img_list = []
# print(res_json)
for img_url in res_json["img_list"]:
try:
img_wb = retry_get_url(img_url).content
res = upload(img_wb, img_type=99)
# print(res)
img_info = retry_get_url(res + "-imageinfo")
img_info_json = img_info.json()
qiniu_img_list.append('<img src="' + res + '-w">')
except Exception as e:
print("down load img error %s" % e)
continue
print(qiniu_img_list)
try:
# if True:
try:
title = res_json["title"].split("\n")[0]
except:
title = res_json["title"]
desc_fix = "<p>" + res_json["title"].replace('\n', '<br>') + "".join(qiniu_img_list) + "</p>"
res_json["desc_fix"] = desc_fix
# print(desc_fix)
# f.write(json.dumps(res_json) + "\n")
# f.flush()
# res = rds.hset("xiaohongshu_with_img", key=pid, value=json.dumps(res_json))
if res_json["releaser_id_str"].replace("weibo_","") in gm_user_id_list:
video_dic["level"] = "5"
else:
video_dic["level"] = "3"
video_dic["platform"] = "2"
video_dic["platform_id"] = pid
video_dic["platform_answer_id"] = pid
video_dic["title"] = title
user_id_list_copy = copy.deepcopy(user_id_list)
qustion_id = random.choice(user_id_list_copy)
user_id_list_copy.remove(qustion_id)
video_dic["user_id"] = qustion_id
create_time = int(res_json["release_time"]/1e3)
video_dic["create_time"] = create_time
rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/question/batch_create")
# print(rpc_res)
video_dic["platform_question_id"] = pid
video_dic["content"] = desc_fix
video_dic["user_id"] = random.choice(user_id_list_copy)
rpc_res = post_single_data(copy.deepcopy(video_dic), "cims/answer/batch_create")
except Exception as e:
print(e)
continue
# break
# f.flush()
# f.close()
\ No newline at end of file
......@@ -217,22 +217,127 @@ if __name__ == '__main__':
test = Crawler_weibo()
# releaserUrl = 'http://v.qq.com/vplus/cfa34d96d1b6609f1dccdea65b26b83d'
url_list = [
# "https://weibo.com/u/1764615662",
"https://weibo.com/u/3662247177",
# "https://weibo.com/u/2378564111",
# "https://weibo.com/u/2983578965",
# "https://weibo.com/u/3938976579",
# "https://weibo.com/u/6511177474",
"https://weibo.com/u/6511173721",
# "https://weibo.com/p/1005055471534537/home?from=page_100505&mod=TAB&is_hot=1#place",
"https://weibo.com/u/3934754081?is_hot=1",
"https://weibo.com/u/1752877052?is_hot=1",
"https://weibo.com/u/5395999826?is_hot=1",
"https://weibo.com/u/6426118092?is_hot=1",
"https://weibo.com/u/5597352985?is_hot=1",
"https://weibo.com/u/5536360057?is_hot=1",
"https://weibo.com/u/6574937525",
"https://weibo.com/u/7396392576?is_hot=1",
"https://weibo.com/p/1005051719151460/home",
"https://weibo.com/u/6514156406?refer_flag=1005050006_",
"https://weibo.com/p/1005051922120917/home",
"https://weibo.com/p/1005051922120917/home",
"https://weibo.com/u/5268223514?is_hot=1",
"https://weibo.com/u/2950468700?is_hot=1",
"https://weibo.com/u/7171118361?is_hot=1",
"https://weibo.com/u/2259870914?is_all=1",
"https://weibo.com/u/7054003977",
"https://weibo.com/u/5311113459",
"https://weibo.com/u/2903761483?refer_flag=1001030103_&is_hot=1",
"https://weibo.com/u/5061685077?refer_flag=1001030103_&is_hot=1",
"https://weibo.com/u/7125942835?is_hot=1",
"https://weibo.com/p/1005055040459465/home",
"https://weibo.com/u/6346951781",
"https://weibo.com/p/1005057224900173/home",
"https://weibo.com/u/6831245172?is_hot=1",
"https://weibo.com/u/2868511894?is_hot=1",
"https://weibo.com/u/6082896987?is_hot=1",
"https://weibo.com/p/1005052198383217/home",
"https://weibo.com/p/1005055305217578/home",
"https://weibo.com/270055196?is_hot=1",
"https://weibo.com/u/5193254387?is_hot=1",
"https://weibo.com/u/6433308372?is_hot=1",
"ttps://weibo.com/u/6165106408?is_hot=1",
"https://weibo.com/p/1005051918721053/home",
"https://weibo.com/611678617?is_hot=1",
"https://weibo.com/u/6899857321?is_hot=1",
"https://weibo.com/u/3607024971?is_hot=1",
"https://weibo.com/u/2913046461?is_hot=1",
"https://weibo.com/u/2674747062?is_hot=1",
"https://weibo.com/u/2412955604?topnav=1&wvr=6&topsug=1",
"https://weibo.com/u/6614002566?is_hot=1",
"https://weibo.com/u/3976738742?is_hot=1",
"https://weibo.com/p/1005052056164927/home",
"https://weibo.com/p/1005055241775605/home",
"https://weibo.com/u/6369481847?is_hot=1",
"https://weibo.com/u/2764013814",
"https://weibo.com/p/1005055119549629/home",
"https://weibo.com/u/6874207129?is_hot=1",
"https://weibo.com/u/1856570454?is_all=1",
"https://weibo.com/u/6433256474?is_hot=1",
"https://weibo.com/u/6854091345?is_hot=1",
"https://weibo.com/p/1005055706502018/home",
"https://weibo.com/u/3916296864?is_hot=1",
"https://weibo.com/u/6758410336?is_hot=1",
"https://weibo.com/u/6095636964?is_hot=1",
"https://weibo.com/u/5627013949?is_all=1",
"https://weibo.com/u/3607576112?profile_ftype=1&is_all=1#_0",
"https://weibo.com/p/1005052004301745/home",
"https://weibo.com/p/1005051891287691/home",
"https://weibo.com/u/1874437190?profile_ftype=1&is_all=1#_0",
"https://weibo.com/u/3307996457?profile_ftype=1&is_all=1#_0",
"https://weibo.com/u/2636564637?profile_ftype=1&is_all=1#_0",
"https://weibo.com/u/6852655070?profile_ftype=1&is_all=1#_0",
"https://weibo.com/u/5589574529?is_hot=1",
"https://weibo.com/p/1005051861506152/home",
"https://weibo.com/u/2135443220?profile_ftype=1&is_all=1#_0",
"https://weibo.com/p/1005055407180113/home",
"https://weibo.com/u/7343693352?profile_ftype=1&is_all=1#_0",
"https://weibo.com/p/1005056064161260/home",
"https://weibo.com/p/1005056347228074/home",
"https://weibo.com/u/1293633660?profile_ftype=1&is_all=1#_0",
"https://weibo.com/u/6567254124?profile_ftype=1&is_all=1#_0",
"https://weibo.com/u/3934401200?profile_ftype=1&is_all=1#_0",
"https://weibo.com/p/1005055270614973/home",
"https://weibo.com/u/6807978304?refer_flag=1001030103_&is_hot=1#_0",
"https://weibo.com/u/1931193225?profile_ftype=1&is_all=1#_0",
"https://weibo.com/u/2501778585?refer_flag=1001030103_&is_hot=1",
"https://weibo.com/u/6857168590?display=0&retcode=6102&sudaref=passport.weibo.com&is_hot=1",
"https://weibo.com/u/2307946431?is_hot=1",
"https://weibo.com/p/1005056228248712/home",
"https://weibo.com/u/2189983640?refer_flag=1001030103_&is_hot=1",
"https://weibo.com/p/1005051868643982/home",
"https://weibo.com/u/6343475414?profile_ftype=1&is_all=1#_0",
"https://weibo.com/u/3912360020?profile_ftype=1&is_all=1#_0",
"https://weibo.com/u/1697399444?profile_ftype=1&is_all=1#_0",
"https://weibo.com/u/6962283304?is_hot=1",
"https://weibo.com/u/3937105056?profile_ftype=1&is_all=1#_0",
"https://weibo.com/p/1005055903923679/home",
"https://weibo.com/u/2267627614?profile_ftype=1&is_all=1#_0",
"https://weibo.com/u/2918298864?is_hot=1#1609235796088",
"https://weibo.com/p/1005052154909694/home",
"https://weibo.com/u/3854719915?is_hot=1",
"https://weibo.com/p/1005056323607436/home",
"https://weibo.com/u/6076861345?is_hot=1",
"https://weibo.com/u/6130461637?is_hot=1",
"https://weibo.com/p/1005053602744927/home",
"https://weibo.com/u/5884212886?is_hot=1",
"https://weibo.com/u/1843074012?from=page_100505_profile&wvr=6&mod=myfollowhisfan&refer_flag=1005050010_&is_all=1",
"https://weibo.com/u/5356845303?is_hot=1",
"https://weibo.com/u/5623052931?refer_flag=1001030103_&is_hot=1#1609745544373",
"https://weibo.com/u/3236957071?topnav=1&wvr=6&topsug=1&is_hot=1",
"https://weibo.com/u/5147711482?refer_flag=1005050006_",
"https://weibo.com/u/6628617667?refer_flag=1005050006_",
"https://weibo.com/u/6855680230?refer_flag=1005050006_",
"https://weibo.com/u/5836153857?refer_flag=1005050006_&is_hot=1",
"https://weibo.com/u/5145935171?refer_flag=1005050006_",
"https://weibo.com/u/5143063731?refer_flag=1005050006_",
"https://weibo.com/u/5144888803?refer_flag=1005050006_",
"https://weibo.com/u/6431905918?refer_flag=1005050006_",
"https://weibo.com/u/7048594049",
]
import redis
rds = redis.StrictRedis(host='172.18.51.10', port=6379, db=17, decode_responses=True)
for url in url_list:
res = test.releaser_page(url, output_to_es_raw=True,
es_index='crawler-data-raw',
releaser_page_num_max=400,proxies_num=0)
releaser_page_num_max=20,proxies_num=0)
for r in res:
print(r)
res = rds.hset("weibo", key=r["doc_id"], value=json.dumps(r))
# for u in url_list:
# test.releaser_page_by_time(1590940800000, 1595468554268, u, output_to_es_raw=False,
# es_index='crawler-data-raw',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment