Commit 2f3b73ca authored by litaolemo's avatar litaolemo

update

parent 288613fc
...@@ -12,10 +12,12 @@ Due to the data-type used in redis is set so I changed the word from list to set ...@@ -12,10 +12,12 @@ Due to the data-type used in redis is set so I changed the word from list to set
platform_redis_set_reg = { platform_redis_set_reg = {
'toutiao': 'toutiao_url_set', 'toutiao': 'toutiao_url_hash',
'腾讯视频': 'v_qq_url_set', '腾讯视频': 'v_qq_url_hash',
'youku': 'youku_url_set', 'youku': 'youku_url_hash',
'iqiyi': 'iqiyi_url_set', 'iqiyi': 'iqiyi_url_hash',
'weibo': 'weibo_url_hash',
'douban': 'douban_url_hash',
} }
......
...@@ -4,22 +4,12 @@ Created on Wed Jun 6 18:18:09 2018 ...@@ -4,22 +4,12 @@ Created on Wed Jun 6 18:18:09 2018
@author: hanye @author: hanye
""" """
import redis import redis, json
from crawler_sys.framework.platform_redis_register import get_redis_list_name from crawler_sys.framework.platform_redis_register import get_redis_list_name
from crawler_sys.framework.es_crawler import scan_crawler_url_register from crawler_sys.framework.es_crawler import scan_crawler_url_register
rds = redis.StrictRedis(host='192.168.17.60', port=6379, db=0) rds = redis.StrictRedis(host='154.8.190.251', port=6379, db=19)
def url_reformer(platform, url):
"""
to reform url according to platform, in the future.
Say, a url of http://www.toutiao.com/group/1234567890123456789
as a string is different from http://www.365yg.com/u/1234567890123456789,
but they point to the same resource. They should be reformed
to one unique url before pushing into redis for futher crawling.
"""
reformed_url = url
return reformed_url
def feed_url_into_redis(dict_Lst, platform, def feed_url_into_redis(dict_Lst, platform,
release_time_lower_bdr=None, release_time_lower_bdr=None,
...@@ -32,35 +22,15 @@ def feed_url_into_redis(dict_Lst, platform, ...@@ -32,35 +22,15 @@ def feed_url_into_redis(dict_Lst, platform,
is not given when call this function, all urls will be is not given when call this function, all urls will be
pushed into redis. pushed into redis.
""" """
redis_list_name = get_redis_list_name(platform, batch_str)
if redis_list_name is None:
print('Failed to get correct redis list name '
'in platform_redis_register for platform: '
% platform)
return (None, None)
else:
print('Feeding url into redis list %s ...' % redis_list_name)
url_counter = 0
for data_dict in dict_Lst: for data_dict in dict_Lst:
try: try:
url = data_dict['url'] doc_id = data_dict['doc_id']
url_reformed = url_reformer(platform, url) sadd_c = rds.lpush(doc_id, json.dumps(data_dict))
if release_time_lower_bdr is None: rds.expire(doc_id,259200)
sadd_c = rds.sadd(redis_list_name, url_reformed)
url_counter += sadd_c
else:
url_release_time = data_dict['release_time']
if url_release_time >= release_time_lower_bdr:
sadd_c = rds.sadd(redis_list_name, url_reformed)
url_counter += sadd_c
except: except:
print('Failed to push url into redis, ' print('Failed to push data into redis')
'might because of lack of url field ' print('Pushed data into redis')
'or lack of release_time field, or ' return True
'has wrong typed release_time value. '
'The failed data dict is: \n %s' % data_dict)
print('Pushed %d urls into redis' % url_counter)
return (redis_list_name, url_counter)
def pull_url_from_es(platform, release_time_lower_bdr=None): def pull_url_from_es(platform, release_time_lower_bdr=None):
""" """
......
...@@ -16,7 +16,7 @@ Data in es will be update when run this program once. ...@@ -16,7 +16,7 @@ Data in es will be update when run this program once.
""" """
from crawler.crawler_sys.site_crawler_by_redis import (crawler_toutiao, crawler_v_qq, crawler_tudou, crawler_haokan, from crawler.crawler_sys.site_crawler_by_redis import (crawler_toutiao, crawler_v_qq, crawler_tudou, crawler_haokan,
crawler_tencent_news, crawler_tencent_news,
crawler_wangyi_news, crawler_kwai, crawler_douyin,toutiao_article,crawler_weibo) crawler_wangyi_news, crawler_kwai, crawler_douyin,toutiao_article,crawler_weibo,crawler_douban)
import sys import sys
from crawler.crawler_sys.utils.output_results import output_result from crawler.crawler_sys.utils.output_results import output_result
import argparse, copy, datetime, time import argparse, copy, datetime, time
...@@ -46,14 +46,12 @@ parser.add_argument('-n', '--max_page', default=30, type=int, ...@@ -46,14 +46,12 @@ parser.add_argument('-n', '--max_page', default=30, type=int,
'must be an int value, default to 30.')) 'must be an int value, default to 30.'))
parser.add_argument('-f', '--output_file_path', default='', type=str, parser.add_argument('-f', '--output_file_path', default='', type=str,
help=('Specify output file path, default None.')) help=('Specify output file path, default None.'))
parser.add_argument('-r', '--push_to_redis', default='False', type=str, parser.add_argument('-r', '--push_to_redis', default=False, type=bool,
help=('Write urls to redis or not, default to True')) help=('Write urls to redis or not, default to True'))
parser.add_argument('-w', '--output_to_es_raw', default='True', type=str, parser.add_argument('-w', '--output_to_es_raw', default='True', type=str,
help=('Write data into es or not, default to True')) help=('Write data into es or not, default to True'))
parser.add_argument('-index', '--es_index', default='crawler-data-raw', type=str, parser.add_argument('-index', '--es_index', default='crawler-data-raw', type=str,
help=('assign a es_index to write into, default to crawler-data-raw')) help=('assign a es_index to write into, default to crawler-data-raw'))
parser.add_argument('-doc', '--doc_type', default='doc', type=str,
help=('assign a doc to write into, default to doc'))
parser.add_argument('-g', '--output_to_es_register', default='True', type=str, parser.add_argument('-g', '--output_to_es_register', default='True', type=str,
help=('Write data into es or not, default to True')) help=('Write data into es or not, default to True'))
parser.add_argument('-processes', '--processes_num', default=8, type=int, parser.add_argument('-processes', '--processes_num', default=8, type=int,
...@@ -84,7 +82,8 @@ platform_crawler_reg = { ...@@ -84,7 +82,8 @@ platform_crawler_reg = {
'抖音': crawler_douyin.Crawler_douyin, '抖音': crawler_douyin.Crawler_douyin,
"网易新闻": crawler_wangyi_news.Crawler_wangyi_news, "网易新闻": crawler_wangyi_news.Crawler_wangyi_news,
"kwai": crawler_kwai.Crawler_kwai, "kwai": crawler_kwai.Crawler_kwai,
"weibo": crawler_weibo.Crawler_weibo "weibo": crawler_weibo.Crawler_weibo,
"douban":crawler_douban.Crawler_douban
} }
...@@ -212,6 +211,7 @@ def single_thead(processe,name): ...@@ -212,6 +211,7 @@ def single_thead(processe,name):
if not count_has: if not count_has:
releaser_body["mssage"] = "爬取失败,请检查账号" releaser_body["mssage"] = "爬取失败,请检查账号"
rds_1.hset("error",releaser_body["platform"] + "/" +releaser_body["releaserUrl"],json.dumps(releaser_body)) rds_1.hset("error",releaser_body["platform"] + "/" +releaser_body["releaserUrl"],json.dumps(releaser_body))
if data_list != []: if data_list != []:
output_result(result_Lst=data_list, output_result(result_Lst=data_list,
platform=platform, platform=platform,
...@@ -219,7 +219,6 @@ def single_thead(processe,name): ...@@ -219,7 +219,6 @@ def single_thead(processe,name):
filepath=None, filepath=None,
output_to_es_raw=output_to_es_raw, output_to_es_raw=output_to_es_raw,
es_index=es_index, es_index=es_index,
doc_type=doc_type,
output_to_es_register=output_to_es_register) output_to_es_register=output_to_es_register)
print(len(data_list)) print(len(data_list))
data_list.clear() data_list.clear()
...@@ -249,6 +248,7 @@ def start_crawler(processe,name): ...@@ -249,6 +248,7 @@ def start_crawler(processe,name):
# # t.setDaemon(False) # # # t.setDaemon(False) #
t.start() t.start()
if __name__ == "__main__": if __name__ == "__main__":
executor = ProcessPoolExecutor(max_workers=processes_num) executor = ProcessPoolExecutor(max_workers=processes_num)
futures = [] futures = []
......
...@@ -21,7 +21,7 @@ from crawler.crawler_sys.utils.util_logging import logged ...@@ -21,7 +21,7 @@ from crawler.crawler_sys.utils.util_logging import logged
from crawler.crawler_sys.proxy_pool.func_get_proxy_form_kuaidaili import get_proxy from crawler.crawler_sys.proxy_pool.func_get_proxy_form_kuaidaili import get_proxy
from crawler.crawler_sys.utils.html_to_str import dehtml from crawler.crawler_sys.utils.html_to_str import dehtml
from write_data_into_es.func_get_releaser_id import * from write_data_into_es.func_get_releaser_id import *
from write_data_into_es.func_cal_doc_id import cal_doc_id
class Crawler_weibo(): class Crawler_weibo():
def __init__(self, timeout=None, platform='weibo'): def __init__(self, timeout=None, platform='weibo'):
...@@ -65,6 +65,20 @@ class Crawler_weibo(): ...@@ -65,6 +65,20 @@ class Crawler_weibo():
def get_releaser_id(self, releaserUrl): def get_releaser_id(self, releaserUrl):
return get_releaser_id(platform=self.platform, releaserUrl=releaserUrl) return get_releaser_id(platform=self.platform, releaserUrl=releaserUrl)
@staticmethod
def get_img(data):
img_list = []
if data.get("pics"):
for one in data.get("pics"):
try:
img_list.append(one["large"]["url"])
except Exception as e:
img_list.append(one["url"])
print("add img error %s" % e)
return img_list
def releaser_page(self, releaserUrl, def releaser_page(self, releaserUrl,
output_to_file=False, filepath=None, output_to_file=False, filepath=None,
output_to_es_raw=False, output_to_es_raw=False,
...@@ -75,7 +89,7 @@ class Crawler_weibo(): ...@@ -75,7 +89,7 @@ class Crawler_weibo():
doc_type=None, proxies_num=None): doc_type=None, proxies_num=None):
print('Processing releaserUrl %s' % releaserUrl) print('Processing releaserUrl %s' % releaserUrl)
result_Lst = [] result_Lst = []
releaser_id, containerid = self.get_releaser_id(releaserUrl) releaser_id = self.get_releaser_id(releaserUrl)
# xsrf_token,url_extr = self.get_weibo_info(releaser_id) # xsrf_token,url_extr = self.get_weibo_info(releaser_id)
headers = { headers = {
"accept": "application/json, text/plain, */*", "accept": "application/json, text/plain, */*",
...@@ -131,6 +145,9 @@ class Crawler_weibo(): ...@@ -131,6 +145,9 @@ class Crawler_weibo():
mid = mblog.get("mid") mid = mblog.get("mid")
forward_text = "" forward_text = ""
forward_user = "" forward_user = ""
doc_id = cal_doc_id(platform=res_dic["platform"], url=res_dic["url"], data_dict=res_dic,
doc_id_type="all-time-url")
if one.get("source") == "绿洲": if one.get("source") == "绿洲":
text_type = "绿洲" text_type = "绿洲"
elif mblog.get("retweeted_status"): elif mblog.get("retweeted_status"):
...@@ -156,7 +173,9 @@ class Crawler_weibo(): ...@@ -156,7 +173,9 @@ class Crawler_weibo():
"forward_text": forward_text, "forward_text": forward_text,
"mid": mid, "mid": mid,
"releaserUrl": "https://www.weibo.com/u/%s" % releaser_id, "releaserUrl": "https://www.weibo.com/u/%s" % releaser_id,
"releaser_id_str": "weibo_%s" % releaser_id "releaser_id_str": "weibo_%s" % releaser_id,
"img_list":self.get_img(mblog),
"doc_id":doc_id
} }
yield res_dic yield res_dic
except Exception as e: except Exception as e:
...@@ -189,7 +208,7 @@ if __name__ == '__main__': ...@@ -189,7 +208,7 @@ if __name__ == '__main__':
# releaserUrl = 'http://v.qq.com/vplus/cfa34d96d1b6609f1dccdea65b26b83d' # releaserUrl = 'http://v.qq.com/vplus/cfa34d96d1b6609f1dccdea65b26b83d'
url_list = [ url_list = [
# "https://weibo.com/u/1764615662", # "https://weibo.com/u/1764615662",
# "https://weibo.com/u/3662247177", "https://weibo.com/u/3662247177",
# "https://weibo.com/u/2378564111", # "https://weibo.com/u/2378564111",
# "https://weibo.com/u/2983578965", # "https://weibo.com/u/2983578965",
# "https://weibo.com/u/3938976579", # "https://weibo.com/u/3938976579",
...@@ -198,13 +217,14 @@ if __name__ == '__main__': ...@@ -198,13 +217,14 @@ if __name__ == '__main__':
# "https://weibo.com/p/1005055471534537/home?from=page_100505&mod=TAB&is_hot=1#place", # "https://weibo.com/p/1005055471534537/home?from=page_100505&mod=TAB&is_hot=1#place",
] ]
# res = test.releaser_page(url, output_to_es_raw=True, for url in url_list:
# es_index='crawler-data-raw', res = test.releaser_page(url, output_to_es_raw=True,
# releaser_page_num_max=400,proxies_num=0)
# for r in res:
# print(r)
for u in url_list:
test.releaser_page_by_time(1590940800000, 1595468554268, u, output_to_es_raw=False,
es_index='crawler-data-raw', es_index='crawler-data-raw',
doc_type='doc', releaser_page_num_max=4000) releaser_page_num_max=400,proxies_num=0)
for r in res:
print(r)
# for u in url_list:
# test.releaser_page_by_time(1590940800000, 1595468554268, u, output_to_es_raw=False,
# es_index='crawler-data-raw',
# doc_type='doc', releaser_page_num_max=4000)
# test.get_single_page(4524055937468233) # test.get_single_page(4524055937468233)
...@@ -27,8 +27,10 @@ from crawler.crawler_sys.utils.trans_strtime_to_timestamp import weibo_parse_tim ...@@ -27,8 +27,10 @@ from crawler.crawler_sys.utils.trans_strtime_to_timestamp import weibo_parse_tim
# from crawler.crawler_sys.utils.util_logging import logged # from crawler.crawler_sys.utils.util_logging import logged
# from crawler.crawler_sys.proxy_pool.func_get_proxy_form_kuaidaili import get_proxy # from crawler.crawler_sys.proxy_pool.func_get_proxy_form_kuaidaili import get_proxy
from crawler.crawler_sys.utils.html_to_str import dehtml from crawler.crawler_sys.utils.html_to_str import dehtml
from bs4 import BeautifulSoup
from write_data_into_es.func_get_releaser_id import * from write_data_into_es.func_get_releaser_id import *
from write_data_into_es.func_cal_doc_id import cal_doc_id
class Crawler_douban(): class Crawler_douban():
...@@ -70,17 +72,23 @@ class Crawler_douban(): ...@@ -70,17 +72,23 @@ class Crawler_douban():
# content = dehtml(page_json["content"]) # content = dehtml(page_json["content"])
if page_json.get('localized_message'): if page_json.get('localized_message'):
continue continue
# content_html = """<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Title</title></head><body>%s</body></html>""" % page_json["content"]
# bs = BeautifulSoup(content_html, "html.parser")
# content = bs.textarea.get_text()
content = page_json["content"] content = page_json["content"]
repost_count = trans_play_count(page_json["reshares_count"]) repost_count = trans_play_count(page_json["reshares_count"])
comment_count = trans_play_count(page_json["comments_count"]) comment_count = trans_play_count(page_json["comments_count"])
favorite_count = trans_play_count(page_json["like_count"]) favorite_count = trans_play_count(page_json["like_count"])
collection_count = trans_play_count(page_json["collections_count"]) collection_count = trans_play_count(page_json["collections_count"])
img_list = re.findall('img src=".*?"',content)
dic = { dic = {
"content":content, "content":content,
"repost_count":repost_count, "repost_count":repost_count,
"comment_count":comment_count, "comment_count":comment_count,
"favorite_count":favorite_count, "favorite_count":favorite_count,
"collection_count":collection_count, "collection_count":collection_count,
"img_list":img_list,
} }
return dic return dic
except Exception as e: except Exception as e:
...@@ -148,6 +156,8 @@ class Crawler_douban(): ...@@ -148,6 +156,8 @@ class Crawler_douban():
for one in page_dic: for one in page_dic:
releaser_id = one["author"]["id"] releaser_id = one["author"]["id"]
mid = one["id"] mid = one["id"]
doc_id = cal_doc_id(platform=res_dic["platform"], url=res_dic["url"], data_dict=res_dic,doc_id_type="all-time-url")
try: try:
res_dic = { res_dic = {
"release_time": trans_strtime_to_timestamp(one["create_time"]), "release_time": trans_strtime_to_timestamp(one["create_time"]),
...@@ -160,10 +170,14 @@ class Crawler_douban(): ...@@ -160,10 +170,14 @@ class Crawler_douban():
"releaserUrl": "https://www.douban.com/people/%s" % releaser_id, "releaserUrl": "https://www.douban.com/people/%s" % releaser_id,
"releaser_id_str": "douban_%s" % releaser_id, "releaser_id_str": "douban_%s" % releaser_id,
'video_img':one["cover_url"], 'video_img':one["cover_url"],
"mid":mid "mid":mid,
"platform":"douban",
"doc_id":doc_id
} }
res_dic.update(self.get_single_page(mid,proxies_num)) res_dic.update(self.get_single_page(mid,proxies_num))
print(res_dic) print(res_dic)
yield res_dic yield res_dic
except Exception as e: except Exception as e:
print(one) print(one)
......
...@@ -192,8 +192,13 @@ class Crawler_weibo(): ...@@ -192,8 +192,13 @@ class Crawler_weibo():
"forward_text":forward_text, "forward_text":forward_text,
"mid":mid, "mid":mid,
"releaserUrl":"https://www.weibo.com/u/%s" % releaser_id, "releaserUrl":"https://www.weibo.com/u/%s" % releaser_id,
"releaser_id_str":"weibo_%s" % releaser_id "releaser_id_str":"weibo_%s" % releaser_id,
"platform":"weibo"
} }
# from write_data_into_es.func_cal_doc_id import cal_doc_id
# id = cal_doc_id(platform=res_dic["platform"], url=res_dic["url"], data_dict=res_dic,
# doc_id_type="all-time-url")
# print(id)
yield res_dic yield res_dic
except Exception as e: except Exception as e:
print(mblog) print(mblog)
......
...@@ -68,6 +68,7 @@ def hot_words_output_result(result_Lst,output_index="short-video-hotwords"): ...@@ -68,6 +68,7 @@ def hot_words_output_result(result_Lst,output_index="short-video-hotwords"):
if eror_dic['errors'] is True: if eror_dic['errors'] is True:
print(eror_dic) print(eror_dic)
def output_result(result_Lst, platform, def output_result(result_Lst, platform,
output_to_file=False, filepath=None, output_to_file=False, filepath=None,
output_to_es_raw=False, output_to_es_raw=False,
...@@ -91,10 +92,8 @@ def output_result(result_Lst, platform, ...@@ -91,10 +92,8 @@ def output_result(result_Lst, platform,
# feed url into redis # feed url into redis
if push_to_redis: if push_to_redis:
redis_list_name, url_counter = feed_url_into_redis( feed_url_into_redis(
result_Lst, platform, result_Lst, platform)
batch_str=batch_str,
release_time_lower_bdr=release_time_lower_bdr)
# output into file according to passed in parameters # output into file according to passed in parameters
if output_to_file is True and filepath is not None: if output_to_file is True and filepath is not None:
...@@ -144,6 +143,7 @@ def get_ill_encoded_str_posi(UnicodeEncodeError_msg): ...@@ -144,6 +143,7 @@ def get_ill_encoded_str_posi(UnicodeEncodeError_msg):
pass pass
return posi_nums return posi_nums
def bulk_write_into_es(dict_Lst, def bulk_write_into_es(dict_Lst,
index, index,
construct_id=False, construct_id=False,
...@@ -275,7 +275,6 @@ def scan_redis_to_crawl(): ...@@ -275,7 +275,6 @@ def scan_redis_to_crawl():
break break
def remove_fetched_url_from_redis(remove_interval=10): def remove_fetched_url_from_redis(remove_interval=10):
time.sleep(remove_interval) time.sleep(remove_interval)
cur = 0 cur = 0
......
...@@ -5,3 +5,4 @@ absl-py==0.9.0 ...@@ -5,3 +5,4 @@ absl-py==0.9.0
kdl==0.2.15 kdl==0.2.15
redis==3.5.3 redis==3.5.3
elasticsearch==7.8.0 elasticsearch==7.8.0
qiniu==7.2.8
\ No newline at end of file
...@@ -275,6 +275,13 @@ def weibo(releaserUrl,**kwargs): ...@@ -275,6 +275,13 @@ def weibo(releaserUrl,**kwargs):
except: except:
return None return None
def douban(releaserUrl,**kwargs):
if "people/" in releaserUrl:
releaser_id = re.findall(r"people/(.*)", releaserUrl)[0]
return releaser_id
plantform_func = { plantform_func = {
"toutiao": toutiao, "toutiao": toutiao,
"西瓜":toutiao, "西瓜":toutiao,
...@@ -289,7 +296,8 @@ plantform_func = { ...@@ -289,7 +296,8 @@ plantform_func = {
"抖音":douyin, "抖音":douyin,
"weixin":weixin, "weixin":weixin,
"weibo":weibo, "weibo":weibo,
"pearvideo":pearvideo "pearvideo":pearvideo,
"douban":douban
} }
......
...@@ -273,8 +273,98 @@ if __name__ == "__main__": ...@@ -273,8 +273,98 @@ if __name__ == "__main__":
{"releaserUrl": "https://weibo.com/p/1005055634795408/home?from=page_100505&mod=TAB#place", "releaser": "圈八戒 ", {"releaserUrl": "https://weibo.com/p/1005055634795408/home?from=page_100505&mod=TAB#place", "releaser": "圈八戒 ",
"platform": "weibo"}, "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/6511173721", "releaser": "圈内课代表", "platform": "weibo"}, {"releaserUrl": "https://weibo.com/u/6511173721", "releaser": "圈内课代表", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1005055471534537/home?from=page_100505&mod=TAB&is_hot=1#place", {"releaserUrl": "https://weibo.com/p/1005055471534537/home?from=page_100505&mod=TAB#place", "releaser": "娱闻少女",
"releaser": "娱闻少女", "platform": "weibo"} "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/3193443435", "releaser": "圈太妹", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/2022990945", "releaser": "圈内狙击手", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/1809782810?is_all=1", "releaser": "全娱乐爆料", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5157190426?is_all=1", "releaser": "娱乐扒少", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/2125613987?is_all=1", "releaser": "圈内一把手 ", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1005051948622644/home?from=page_100505&mod=TAB#place",
"releaser": "影视圈扒姐 ", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/2611791490", "releaser": "娱评八公", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/1652840683", "releaser": "追星", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5086098727?is_hot=1", "releaser": "闻娱教主", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5101787982?is_all=1", "releaser": "扒婆说", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5101844765?is_hot=1", "releaser": "星娱客 ", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1005052115034114/home?from=page_100505&mod=TAB#place",
"releaser": "娱乐明星团 ", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/6473952993?is_hot=1", "releaser": "偶像日报", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5106602573?is_hot=1", "releaser": "八哥", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5909342713?", "releaser": "圈内教父", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/3200673035?", "releaser": "扒圈老鬼", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1005055965621313/home?from=page_100505&mod=TAB#place", "releaser": "圈内师爷",
"platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/1915749764?is_hot=1", "releaser": "迷妹速报", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1002061836328652/home?from=page_100206&mod=TAB#place", "releaser": "前线娱乐",
"platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5896207859?is_hot=1", "releaser": "娱记者", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5717515328?is_hot=1", "releaser": "娱老汉", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1005051795994180/home?from=page_100505&mod=TAB#place",
"releaser": "娱乐News", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5978818414?is_hot=1", "releaser": "娱圈蜀黍", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/2489917511?is_hot=1", "releaser": "芒果捞扒婆 ", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5279487569?is_hot=1", "releaser": "娱姐速报 ", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5106602573?is_hot=1", "releaser": "八哥 ", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5323541229?profile_ftype=1&is_all=1#_0", "releaser": "国内外白富美揭秘 ",
"platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1003062512591982/home?from=page_100306&mod=TAB#place", "releaser": "圈少爷",
"platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/2821843050?profile_ftype=1&is_all=1#_0", "releaser": "圈内老鬼",
"platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/3028215832?profile_ftype=1&is_all=1#_0", "releaser": "娱扒爷",
"platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5336756846?profile_ftype=1&is_all=1#_0", "releaser": "兔兔热议",
"platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1005051844235935/home?from=page_100505&mod=TAB#place",
"releaser": "娱乐圈外汉", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1005052586409491/home?from=page_100505&mod=TAB#place",
"releaser": "娱乐圈吃瓜指南 ", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5255814135", "releaser": "八组兔区爆料", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/2871033210?is_hot=1", "releaser": "八组兔区热议 ", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1005052813285937/home?from=page_100505&mod=TAB#place",
"releaser": "八组兔区娱乐圈", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1005052831749482/home?from=page_100505&mod=TAB#place",
"releaser": "八组兔区揭秘", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/2709814831", "releaser": "娱大蜀黍", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5634795408", "releaser": "圈八戒", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5176743404", "releaser": "瓜瓜搬运机", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5039775130", "releaser": "娱乐揭秘蜀黍", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/7123521074", "releaser": "饭圈日报", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/1746658980", "releaser": "饭圈阿姨", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1005052453653365/home?from=page_100505&mod=TAB#place", "releaser": "圈内星探",
"platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/6311417880?profile_ftype=1&is_all=1#_0", "releaser": "星扒婆 ",
"platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/1420816495?profile_ftype=1&is_all=1#_0", "releaser": "娱尾纹",
"platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/1974754790", "releaser": "教父娱乐", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/1818950785?refer_flag=1028035010_&is_hot=1", "releaser": "扒圈有鱼",
"platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/1893711543", "releaser": "娱乐有饭", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1002061653255165/home?from=page_100206&mod=TAB#place",
"releaser": "娱乐日爆社", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1005052391322817/home?from=page_100505&mod=TAB#place", "releaser": "小娱乐家",
"platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1003061994712500/home?from=page_100306&mod=TAB#place",
"releaser": "星扒客push", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5700087877", "releaser": "毒舌八卦", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/3779202361", "releaser": "西皮娱乐", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/1632619962", "releaser": "瓜组新鲜事", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1005052103460752/home?from=page_100505&mod=TAB#place", "releaser": "娱嬷嬷 ",
"platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/5874584452", "releaser": "吃瓜鹅每日搬", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1005052397961280/home?from=page_100505&mod=TAB#place", "releaser": "娱大白",
"platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1005053246379064/home?from=page_100505&mod=TAB#place",
"releaser": "娱乐圈扒姐 ", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/u/1830483711", "releaser": "娱乐女记", "platform": "weibo"},
{"releaserUrl": "https://weibo.com/p/1005053847401640/home?from=page_100505&mod=TAB#place",
"releaser": "吃瓜爆料每日搬 ", "platform": "weibo"},
{"releaserUrl": "https://www.douban.com/people/hot_tag",
"releaser": "hot_tag", "platform": "douban"},
{"releaserUrl": "https://www.douban.com/people/new_tag",
"releaser": "new_tag", "platform": "douban"}
] ]
extra_dic = { extra_dic = {
"department_tags":["策略组"], "department_tags":["策略组"],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment