Commit ea7d0c06 authored by litaolemo's avatar litaolemo

update

parent 2f3b73ca
# crawler # crawler
1. 部署在BJ-GM-Prod-Cos-faiss001/srv/apps/ 1. 部署在BJ-GM-Prod-Cos-faiss001/srv/apps/
2. 创建虚拟环境 conda activate crawler_env/conda deactivate 2. 切换权限 sudo su - gmuser
\ No newline at end of file 3. source /root/anaconda3/bin/activate
4. 创建虚拟环境 conda activate crawler_env/conda deactivate
\ No newline at end of file
sudo su - gmuser
source /root/anaconda3/bin/activate
conda activate crawler_env
\ No newline at end of file
...@@ -14,9 +14,11 @@ Data in es will be update when run this program once. ...@@ -14,9 +14,11 @@ Data in es will be update when run this program once.
@author: hanye @author: hanye
""" """
from crawler.crawler_sys.site_crawler_by_redis import (crawler_toutiao, crawler_v_qq, crawler_tudou, crawler_haokan, from crawler.crawler_sys.site_crawler_by_redis import (crawler_toutiao,
crawler_tencent_news, # crawler_v_qq, crawler_tudou, crawler_haokan,
crawler_wangyi_news, crawler_kwai, crawler_douyin,toutiao_article,crawler_weibo,crawler_douban) # crawler_tencent_news,
# crawler_wangyi_news, crawler_kwai, crawler_douyin,
toutiao_article,crawler_weibo,crawler_douban)
import sys import sys
from crawler.crawler_sys.utils.output_results import output_result from crawler.crawler_sys.utils.output_results import output_result
import argparse, copy, datetime, time import argparse, copy, datetime, time
...@@ -69,19 +71,19 @@ args = parser.parse_args() ...@@ -69,19 +71,19 @@ args = parser.parse_args()
platform_crawler_reg = { platform_crawler_reg = {
'toutiao': crawler_toutiao.Crawler_toutiao, 'toutiao': crawler_toutiao.Crawler_toutiao,
'toutiao_article': toutiao_article.Crawler_toutiao_article, 'toutiao_article': toutiao_article.Crawler_toutiao_article,
'腾讯视频': crawler_v_qq.Crawler_v_qq, # '腾讯视频': crawler_v_qq.Crawler_v_qq,
# 'iqiyi': crawler_iqiyi.Crawler_iqiyi, # 'iqiyi': crawler_iqiyi.Crawler_iqiyi,
# 'youku': crawler_youku.Crawler_youku, # 'youku': crawler_youku.Crawler_youku,
'new_tudou': crawler_tudou.Crawler_tudou, # 'new_tudou': crawler_tudou.Crawler_tudou,
'haokan': crawler_haokan.Crawler_haokan, # 'haokan': crawler_haokan.Crawler_haokan,
'腾讯新闻': crawler_tencent_news.Crawler_Tencent_News, # '腾讯新闻': crawler_tencent_news.Crawler_Tencent_News,
# 'miaopai': crawler_miaopai.Crawler_miaopai, # 'miaopai': crawler_miaopai.Crawler_miaopai,
# 'pearvideo': crawler_pear.Crawler_pear, # 'pearvideo': crawler_pear.Crawler_pear,
# 'bilibili': crawler_bilibili.Crawler_bilibili, # 'bilibili': crawler_bilibili.Crawler_bilibili,
# 'Mango': crawler_mango, # 'Mango': crawler_mango,
'抖音': crawler_douyin.Crawler_douyin, # '抖音': crawler_douyin.Crawler_douyin,
"网易新闻": crawler_wangyi_news.Crawler_wangyi_news, # "网易新闻": crawler_wangyi_news.Crawler_wangyi_news,
"kwai": crawler_kwai.Crawler_kwai, # "kwai": crawler_kwai.Crawler_kwai,
"weibo": crawler_weibo.Crawler_weibo, "weibo": crawler_weibo.Crawler_weibo,
"douban":crawler_douban.Crawler_douban "douban":crawler_douban.Crawler_douban
} }
......
...@@ -5,4 +5,9 @@ absl-py==0.9.0 ...@@ -5,4 +5,9 @@ absl-py==0.9.0
kdl==0.2.15 kdl==0.2.15
redis==3.5.3 redis==3.5.3
elasticsearch==7.8.0 elasticsearch==7.8.0
qiniu==7.2.8 qiniu==7.2.8
\ No newline at end of file aiohttp==3.6.2
bs4==4.9.1
selenium==3.141.0
fontTools==4.13.0
numpy==1.19.1
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment