update

2f3b73ca · litaolemo · 288613fc · 2f3b73ca · 2f3b73ca · 2f3b73ca
Commit 2f3b73ca authored Jul 27, 2020 by litaolemo
10 changed files
--- a/crawler_sys/framework/platform_redis_register.py
+++ b/crawler_sys/framework/platform_redis_register.py
@@ -12,10 +12,12 @@ Due to the data-type used in redis is set so I changed the word from list to set


 platform_redis_set_reg = {
-    'toutiao': 'toutiao_url_set',
-    '腾讯视频': 'v_qq_url_set',
-    'youku': 'youku_url_set',
-    'iqiyi': 'iqiyi_url_set',
+    'toutiao': 'toutiao_url_hash',
+    '腾讯视频': 'v_qq_url_hash',
+    'youku': 'youku_url_hash',
+    'iqiyi': 'iqiyi_url_hash',
+    'weibo': 'weibo_url_hash',
+    'douban': 'douban_url_hash',
    }



--- a/crawler_sys/framework/redis_interact.py
+++ b/crawler_sys/framework/redis_interact.py
@@ -4,22 +4,12 @@ Created on Wed Jun  6 18:18:09 2018

 @author: hanye
 """
-import redis
+import redis, json
 from crawler_sys.framework.platform_redis_register import get_redis_list_name
 from crawler_sys.framework.es_crawler import scan_crawler_url_register

-rds = redis.StrictRedis(host='192.168.17.60', port=6379, db=0)
+rds = redis.StrictRedis(host='154.8.190.251', port=6379, db=19)

-def url_reformer(platform, url):
-    """
-    to reform url according to platform, in the future.
-    Say, a url of http://www.toutiao.com/group/1234567890123456789
-    as a string is different from http://www.365yg.com/u/1234567890123456789,
-    but they point to the same resource. They should be reformed
-    to one unique url before pushing into redis for futher crawling.
-    """
-    reformed_url = url
-    return reformed_url

 def feed_url_into_redis(dict_Lst, platform,
                        release_time_lower_bdr=None,
@@ -32,35 +22,15 @@ def feed_url_into_redis(dict_Lst, platform,
    is not given when call this function, all urls will be
    pushed into redis.
    """
-    redis_list_name = get_redis_list_name(platform, batch_str)
-    if redis_list_name is None:
-        print('Failed to get correct redis list name '
-              'in platform_redis_register for platform: '
-              % platform)
-        return (None, None)
-    else:
-        print('Feeding url into redis list %s ...' % redis_list_name)
-        url_counter = 0
    for data_dict in dict_Lst:
        try:
-                url = data_dict['url']
-                url_reformed = url_reformer(platform, url)
-                if release_time_lower_bdr is None:
-                    sadd_c = rds.sadd(redis_list_name, url_reformed)
-                    url_counter += sadd_c
-                else:
-                    url_release_time = data_dict['release_time']
-                    if url_release_time >= release_time_lower_bdr:
-                        sadd_c = rds.sadd(redis_list_name, url_reformed)
-                        url_counter += sadd_c
+            doc_id = data_dict['doc_id']
+            sadd_c = rds.lpush(doc_id, json.dumps(data_dict))
+            rds.expire(doc_id,259200)
        except:
-                print('Failed to push url into redis, '
-                      'might because of lack of url field '
-                      'or lack of release_time field, or '
-                      'has wrong typed release_time value. '
-                      'The failed data dict is: \n %s' % data_dict)
-        print('Pushed %d urls into redis' % url_counter)
-        return (redis_list_name, url_counter)
+            print('Failed to push data into redis')
+    print('Pushed data into redis')
+    return True

 def pull_url_from_es(platform, release_time_lower_bdr=None):
    """

--- a/crawler_sys/framework/update_data_in_target_releasers_multi_process_by_date_from_redis.py
+++ b/crawler_sys/framework/update_data_in_target_releasers_multi_process_by_date_from_redis.py
@@ -16,7 +16,7 @@ Data in es will be update when run this program once.
 """
 from crawler.crawler_sys.site_crawler_by_redis import (crawler_toutiao, crawler_v_qq, crawler_tudou, crawler_haokan,
                                                       crawler_tencent_news,
-                                                       crawler_wangyi_news, crawler_kwai, crawler_douyin,toutiao_article,crawler_weibo)
+                                                       crawler_wangyi_news, crawler_kwai, crawler_douyin,toutiao_article,crawler_weibo,crawler_douban)
 import sys
 from crawler.crawler_sys.utils.output_results import output_result
 import argparse, copy, datetime, time
@@ -46,14 +46,12 @@ parser.add_argument('-n', '--max_page', default=30, type=int,
                          'must be an int value, default to 30.'))
 parser.add_argument('-f', '--output_file_path', default='', type=str,
                    help=('Specify output file path, default None.'))
-parser.add_argument('-r', '--push_to_redis', default='False', type=str,
+parser.add_argument('-r', '--push_to_redis', default=False, type=bool,
                    help=('Write urls to redis or not, default to True'))
 parser.add_argument('-w', '--output_to_es_raw', default='True', type=str,
                    help=('Write data into es or not, default to True'))
 parser.add_argument('-index', '--es_index', default='crawler-data-raw', type=str,
                    help=('assign a es_index to write into, default to crawler-data-raw'))
-parser.add_argument('-doc', '--doc_type', default='doc', type=str,
-                    help=('assign a doc to write into, default to doc'))
 parser.add_argument('-g', '--output_to_es_register', default='True', type=str,
                    help=('Write data into es or not, default to True'))
 parser.add_argument('-processes', '--processes_num', default=8, type=int,
@@ -84,7 +82,8 @@ platform_crawler_reg = {
        '抖音': crawler_douyin.Crawler_douyin,
        "网易新闻": crawler_wangyi_news.Crawler_wangyi_news,
        "kwai": crawler_kwai.Crawler_kwai,
-        "weibo": crawler_weibo.Crawler_weibo
+        "weibo": crawler_weibo.Crawler_weibo,
+        "douban":crawler_douban.Crawler_douban
 }


@@ -212,6 +211,7 @@ def single_thead(processe,name):
                    if not count_has:
                        releaser_body["mssage"] = "爬取失败,请检查账号"
                        rds_1.hset("error",releaser_body["platform"] + "/" +releaser_body["releaserUrl"],json.dumps(releaser_body))
+
                    if data_list != []:
                        output_result(result_Lst=data_list,
                                      platform=platform,
@@ -219,7 +219,6 @@ def single_thead(processe,name):
                                      filepath=None,
                                      output_to_es_raw=output_to_es_raw,
                                      es_index=es_index,
-                                      doc_type=doc_type,
                                      output_to_es_register=output_to_es_register)
                        print(len(data_list))
                        data_list.clear()
@@ -249,6 +248,7 @@ def start_crawler(processe,name):
        # # t.setDaemon(False) #
        t.start()

+
 if __name__ == "__main__":
    executor = ProcessPoolExecutor(max_workers=processes_num)
    futures = []

--- a/crawler_sys/site_crawler_by_redis/crawler_weibo.py
+++ b/crawler_sys/site_crawler_by_redis/crawler_weibo.py
@@ -21,7 +21,7 @@ from crawler.crawler_sys.utils.util_logging import logged
 from crawler.crawler_sys.proxy_pool.func_get_proxy_form_kuaidaili import get_proxy
 from crawler.crawler_sys.utils.html_to_str import dehtml
 from write_data_into_es.func_get_releaser_id import *
-
+from write_data_into_es.func_cal_doc_id import cal_doc_id

 class Crawler_weibo():
    def __init__(self, timeout=None, platform='weibo'):
@@ -65,6 +65,20 @@ class Crawler_weibo():
    def get_releaser_id(self, releaserUrl):
        return get_releaser_id(platform=self.platform, releaserUrl=releaserUrl)

+    @staticmethod
+    def get_img(data):
+        img_list = []
+        if data.get("pics"):
+            for one in data.get("pics"):
+                try:
+                    img_list.append(one["large"]["url"])
+                except Exception as e:
+                    img_list.append(one["url"])
+                    print("add img error %s" % e)
+        return img_list
+
+
+
    def releaser_page(self, releaserUrl,
                      output_to_file=False, filepath=None,
                      output_to_es_raw=False,
@@ -75,7 +89,7 @@ class Crawler_weibo():
                      doc_type=None, proxies_num=None):
        print('Processing releaserUrl %s' % releaserUrl)
        result_Lst = []
-        releaser_id, containerid = self.get_releaser_id(releaserUrl)
+        releaser_id = self.get_releaser_id(releaserUrl)
        # xsrf_token,url_extr = self.get_weibo_info(releaser_id)
        headers = {
            "accept": "application/json, text/plain, */*",
@@ -131,6 +145,9 @@ class Crawler_weibo():
                                mid = mblog.get("mid")
                                forward_text = ""
                                forward_user = ""
+                                doc_id = cal_doc_id(platform=res_dic["platform"], url=res_dic["url"], data_dict=res_dic,
+                                                    doc_id_type="all-time-url")
+
                                if one.get("source") == "绿洲":
                                    text_type = "绿洲"
                                elif mblog.get("retweeted_status"):
@@ -156,7 +173,9 @@ class Crawler_weibo():
                                    "forward_text": forward_text,
                                    "mid": mid,
                                    "releaserUrl": "https://www.weibo.com/u/%s" % releaser_id,
-                                    "releaser_id_str": "weibo_%s" % releaser_id
+                                    "releaser_id_str": "weibo_%s" % releaser_id,
+                                    "img_list":self.get_img(mblog),
+                                    "doc_id":doc_id
                                }
                                yield res_dic
                            except Exception as e:
@@ -189,7 +208,7 @@ if __name__ == '__main__':
    # releaserUrl = 'http://v.qq.com/vplus/cfa34d96d1b6609f1dccdea65b26b83d'
    url_list = [
        # "https://weibo.com/u/1764615662",
-        # "https://weibo.com/u/3662247177",
+        "https://weibo.com/u/3662247177",
        # "https://weibo.com/u/2378564111",
        # "https://weibo.com/u/2983578965",
        # "https://weibo.com/u/3938976579",
@@ -198,13 +217,14 @@ if __name__ == '__main__':
        # "https://weibo.com/p/1005055471534537/home?from=page_100505&mod=TAB&is_hot=1#place",

    ]
-    # res = test.releaser_page(url, output_to_es_raw=True,
-    #                     es_index='crawler-data-raw',
-    #                      releaser_page_num_max=400,proxies_num=0)
-    # for r in res:
-    #     print(r)
-    for u in url_list:
-        test.releaser_page_by_time(1590940800000, 1595468554268, u, output_to_es_raw=False,
+    for url in url_list:
+        res = test.releaser_page(url, output_to_es_raw=True,
                            es_index='crawler-data-raw',
-                                   doc_type='doc', releaser_page_num_max=4000)
+                             releaser_page_num_max=400,proxies_num=0)
+        for r in res:
+            print(r)
+    # for u in url_list:
+    #     test.releaser_page_by_time(1590940800000, 1595468554268, u, output_to_es_raw=False,
+    #                                es_index='crawler-data-raw',
+    #                                doc_type='doc', releaser_page_num_max=4000)
    # test.get_single_page(4524055937468233)
--- a/crawler_sys/site_crawler_test/crawler_douban.py
+++ b/crawler_sys/site_crawler_test/crawler_douban.py
@@ -27,8 +27,10 @@ from crawler.crawler_sys.utils.trans_strtime_to_timestamp import weibo_parse_tim
 # from crawler.crawler_sys.utils.util_logging import logged
 # from crawler.crawler_sys.proxy_pool.func_get_proxy_form_kuaidaili import get_proxy
 from crawler.crawler_sys.utils.html_to_str import dehtml
-
+from bs4 import BeautifulSoup
 from write_data_into_es.func_get_releaser_id import *
+from write_data_into_es.func_cal_doc_id import cal_doc_id
+


 class Crawler_douban():
@@ -70,17 +72,23 @@ class Crawler_douban():
                # content = dehtml(page_json["content"])
                if page_json.get('localized_message'):
                    continue
+
+                # content_html = """<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>Title</title></head><body>%s</body></html>""" % page_json["content"]
+                # bs = BeautifulSoup(content_html, "html.parser")
+                # content = bs.textarea.get_text()
                content = page_json["content"]
                repost_count = trans_play_count(page_json["reshares_count"])
                comment_count = trans_play_count(page_json["comments_count"])
                favorite_count = trans_play_count(page_json["like_count"])
                collection_count = trans_play_count(page_json["collections_count"])
+                img_list = re.findall('img src=".*?"',content)
                dic = {
                    "content":content,
                    "repost_count":repost_count,
                    "comment_count":comment_count,
                    "favorite_count":favorite_count,
                    "collection_count":collection_count,
+                    "img_list":img_list,
                }
                return dic
            except Exception as e:
@@ -148,6 +156,8 @@ class Crawler_douban():
                    for one in page_dic:
                        releaser_id = one["author"]["id"]
                        mid = one["id"]
+                        doc_id = cal_doc_id(platform=res_dic["platform"], url=res_dic["url"], data_dict=res_dic,doc_id_type="all-time-url")
+
                        try:
                            res_dic = {
                                "release_time": trans_strtime_to_timestamp(one["create_time"]),
@@ -160,10 +170,14 @@ class Crawler_douban():
                                "releaserUrl": "https://www.douban.com/people/%s" % releaser_id,
                                "releaser_id_str": "douban_%s" % releaser_id,
                                'video_img':one["cover_url"],
-                                "mid":mid
+                                "mid":mid,
+                                "platform":"douban",
+                                "doc_id":doc_id
                            }
+
                            res_dic.update(self.get_single_page(mid,proxies_num))
                            print(res_dic)
+
                            yield res_dic
                        except Exception as e:
                            print(one)

--- a/crawler_sys/site_crawler_test/crawler_weibo.py
+++ b/crawler_sys/site_crawler_test/crawler_weibo.py
@@ -192,8 +192,13 @@ class Crawler_weibo():
                                    "forward_text":forward_text,
                                    "mid":mid,
                                    "releaserUrl":"https://www.weibo.com/u/%s" % releaser_id,
-                                    "releaser_id_str":"weibo_%s" % releaser_id
+                                    "releaser_id_str":"weibo_%s" % releaser_id,
+                                    "platform":"weibo"
                                }
+                                # from write_data_into_es.func_cal_doc_id import cal_doc_id
+                                # id = cal_doc_id(platform=res_dic["platform"], url=res_dic["url"], data_dict=res_dic,
+                                #                 doc_id_type="all-time-url")
+                                # print(id)
                                yield res_dic
                            except Exception as e:
                                print(mblog)

--- a/crawler_sys/utils/output_results.py
+++ b/crawler_sys/utils/output_results.py
@@ -68,6 +68,7 @@ def hot_words_output_result(result_Lst,output_index="short-video-hotwords"):
        if eror_dic['errors'] is True:
            print(eror_dic)

+
 def output_result(result_Lst, platform,
                  output_to_file=False, filepath=None,
                  output_to_es_raw=False,
@@ -91,10 +92,8 @@ def output_result(result_Lst, platform,

    # feed url into redis
    if push_to_redis:
-        redis_list_name, url_counter = feed_url_into_redis(
-            result_Lst, platform,
-            batch_str=batch_str,
-            release_time_lower_bdr=release_time_lower_bdr)
+        feed_url_into_redis(
+            result_Lst, platform)

    # output into file according to passed in parameters
    if output_to_file is True and filepath is not None:
@@ -144,6 +143,7 @@ def get_ill_encoded_str_posi(UnicodeEncodeError_msg):
        pass
    return posi_nums

+
 def bulk_write_into_es(dict_Lst,
                       index,
                       construct_id=False,
@@ -275,7 +275,6 @@ def scan_redis_to_crawl():
            break


-
 def remove_fetched_url_from_redis(remove_interval=10):
    time.sleep(remove_interval)
    cur = 0

--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ absl-py==0.9.0
 kdl==0.2.15
 redis==3.5.3
 elasticsearch==7.8.0
+qiniu==7.2.8
\ No newline at end of file
--- a/write_data_into_es/func_get_releaser_id.py
+++ b/write_data_into_es/func_get_releaser_id.py
@@ -275,6 +275,13 @@ def weibo(releaserUrl,**kwargs):
    except:
        return None

+
+def douban(releaserUrl,**kwargs):
+    if "people/" in releaserUrl:
+        releaser_id = re.findall(r"people/(.*)", releaserUrl)[0]
+    return releaser_id
+
+
 plantform_func = {
    "toutiao": toutiao,
    "西瓜":toutiao,
@@ -289,7 +296,8 @@ plantform_func = {
    "抖音":douyin,
    "weixin":weixin,
    "weibo":weibo,
-    "pearvideo":pearvideo
+    "pearvideo":pearvideo,
+    "douban":douban
 }



--- a/write_data_into_es/target_releaser_add.py
+++ b/write_data_into_es/target_releaser_add.py
@@ -273,8 +273,98 @@ if __name__ == "__main__":
         {"releaserUrl": "https://weibo.com/p/1005055634795408/home?from=page_100505&mod=TAB#place", "releaser": "圈八戒 ",
          "platform": "weibo"},
         {"releaserUrl": "https://weibo.com/u/6511173721", "releaser": "圈内课代表", "platform": "weibo"},
-         {"releaserUrl": "https://weibo.com/p/1005055471534537/home?from=page_100505&mod=TAB&is_hot=1#place",
-          "releaser": "娱闻少女", "platform": "weibo"}
+         {"releaserUrl": "https://weibo.com/p/1005055471534537/home?from=page_100505&mod=TAB#place", "releaser": "娱闻少女",
+          "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/3193443435", "releaser": "圈太妹", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/2022990945", "releaser": "圈内狙击手", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/1809782810?is_all=1", "releaser": "全娱乐爆料", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5157190426?is_all=1", "releaser": "娱乐扒少", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/2125613987?is_all=1", "releaser": "圈内一把手 ", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1005051948622644/home?from=page_100505&mod=TAB#place",
+          "releaser": "影视圈扒姐 ", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/2611791490", "releaser": "娱评八公", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/1652840683", "releaser": "追星", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5086098727?is_hot=1", "releaser": "闻娱教主", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5101787982?is_all=1", "releaser": "扒婆说", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5101844765?is_hot=1", "releaser": "星娱客 ", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1005052115034114/home?from=page_100505&mod=TAB#place",
+          "releaser": "娱乐明星团 ", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/6473952993?is_hot=1", "releaser": "偶像日报", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5106602573?is_hot=1", "releaser": "八哥", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5909342713?", "releaser": "圈内教父", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/3200673035?", "releaser": "扒圈老鬼", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1005055965621313/home?from=page_100505&mod=TAB#place", "releaser": "圈内师爷",
+          "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/1915749764?is_hot=1", "releaser": "迷妹速报", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1002061836328652/home?from=page_100206&mod=TAB#place", "releaser": "前线娱乐",
+          "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5896207859?is_hot=1", "releaser": "娱记者", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5717515328?is_hot=1", "releaser": "娱老汉", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1005051795994180/home?from=page_100505&mod=TAB#place",
+          "releaser": "娱乐News", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5978818414?is_hot=1", "releaser": "娱圈蜀黍", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/2489917511?is_hot=1", "releaser": "芒果捞扒婆 ", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5279487569?is_hot=1", "releaser": "娱姐速报 ", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5106602573?is_hot=1", "releaser": "八哥 ", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5323541229?profile_ftype=1&is_all=1#_0", "releaser": "国内外白富美揭秘 ",
+          "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1003062512591982/home?from=page_100306&mod=TAB#place", "releaser": "圈少爷",
+          "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/2821843050?profile_ftype=1&is_all=1#_0", "releaser": "圈内老鬼",
+          "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/3028215832?profile_ftype=1&is_all=1#_0", "releaser": "娱扒爷",
+          "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5336756846?profile_ftype=1&is_all=1#_0", "releaser": "兔兔热议",
+          "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1005051844235935/home?from=page_100505&mod=TAB#place",
+          "releaser": "娱乐圈外汉", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1005052586409491/home?from=page_100505&mod=TAB#place",
+          "releaser": "娱乐圈吃瓜指南 ", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5255814135", "releaser": "八组兔区爆料", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/2871033210?is_hot=1", "releaser": "八组兔区热议 ", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1005052813285937/home?from=page_100505&mod=TAB#place",
+          "releaser": "八组兔区娱乐圈", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1005052831749482/home?from=page_100505&mod=TAB#place",
+          "releaser": "八组兔区揭秘", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/2709814831", "releaser": "娱大蜀黍", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5634795408", "releaser": "圈八戒", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5176743404", "releaser": "瓜瓜搬运机", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5039775130", "releaser": "娱乐揭秘蜀黍", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/7123521074", "releaser": "饭圈日报", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/1746658980", "releaser": "饭圈阿姨", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1005052453653365/home?from=page_100505&mod=TAB#place", "releaser": "圈内星探",
+          "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/6311417880?profile_ftype=1&is_all=1#_0", "releaser": "星扒婆 ",
+          "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/1420816495?profile_ftype=1&is_all=1#_0", "releaser": "娱尾纹",
+          "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/1974754790", "releaser": "教父娱乐", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/1818950785?refer_flag=1028035010_&is_hot=1", "releaser": "扒圈有鱼",
+          "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/1893711543", "releaser": "娱乐有饭", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1002061653255165/home?from=page_100206&mod=TAB#place",
+          "releaser": "娱乐日爆社", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1005052391322817/home?from=page_100505&mod=TAB#place", "releaser": "小娱乐家",
+          "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1003061994712500/home?from=page_100306&mod=TAB#place",
+          "releaser": "星扒客push", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5700087877", "releaser": "毒舌八卦", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/3779202361", "releaser": "西皮娱乐", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/1632619962", "releaser": "瓜组新鲜事", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1005052103460752/home?from=page_100505&mod=TAB#place", "releaser": "娱嬷嬷 ",
+          "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/5874584452", "releaser": "吃瓜鹅每日搬", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1005052397961280/home?from=page_100505&mod=TAB#place", "releaser": "娱大白",
+          "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1005053246379064/home?from=page_100505&mod=TAB#place",
+          "releaser": "娱乐圈扒姐 ", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/u/1830483711", "releaser": "娱乐女记", "platform": "weibo"},
+         {"releaserUrl": "https://weibo.com/p/1005053847401640/home?from=page_100505&mod=TAB#place",
+          "releaser": "吃瓜爆料每日搬 ", "platform": "weibo"},
+        {"releaserUrl": "https://www.douban.com/people/hot_tag",
+         "releaser": "hot_tag", "platform": "douban"},
+        {"releaserUrl": "https://www.douban.com/people/new_tag",
+         "releaser": "new_tag", "platform": "douban"}
    ]
    extra_dic = {
            "department_tags":["策略组"],