Commit ea7d0c06 authored by litaolemo's avatar litaolemo

update

parent 2f3b73ca
# crawler
1. 部署在BJ-GM-Prod-Cos-faiss001/srv/apps/
2. 创建虚拟环境 conda activate crawler_env/conda deactivate
\ No newline at end of file
2. 切换权限 sudo su - gmuser
3. source /root/anaconda3/bin/activate
4. 创建虚拟环境 conda activate crawler_env/conda deactivate
\ No newline at end of file
sudo su - gmuser
source /root/anaconda3/bin/activate
conda activate crawler_env
\ No newline at end of file
......@@ -14,9 +14,11 @@ Data in es will be update when run this program once.
@author: hanye
"""
from crawler.crawler_sys.site_crawler_by_redis import (crawler_toutiao, crawler_v_qq, crawler_tudou, crawler_haokan,
crawler_tencent_news,
crawler_wangyi_news, crawler_kwai, crawler_douyin,toutiao_article,crawler_weibo,crawler_douban)
from crawler.crawler_sys.site_crawler_by_redis import (crawler_toutiao,
# crawler_v_qq, crawler_tudou, crawler_haokan,
# crawler_tencent_news,
# crawler_wangyi_news, crawler_kwai, crawler_douyin,
toutiao_article,crawler_weibo,crawler_douban)
import sys
from crawler.crawler_sys.utils.output_results import output_result
import argparse, copy, datetime, time
......@@ -69,19 +71,19 @@ args = parser.parse_args()
platform_crawler_reg = {
'toutiao': crawler_toutiao.Crawler_toutiao,
'toutiao_article': toutiao_article.Crawler_toutiao_article,
'腾讯视频': crawler_v_qq.Crawler_v_qq,
# '腾讯视频': crawler_v_qq.Crawler_v_qq,
# 'iqiyi': crawler_iqiyi.Crawler_iqiyi,
# 'youku': crawler_youku.Crawler_youku,
'new_tudou': crawler_tudou.Crawler_tudou,
'haokan': crawler_haokan.Crawler_haokan,
'腾讯新闻': crawler_tencent_news.Crawler_Tencent_News,
# 'new_tudou': crawler_tudou.Crawler_tudou,
# 'haokan': crawler_haokan.Crawler_haokan,
# '腾讯新闻': crawler_tencent_news.Crawler_Tencent_News,
# 'miaopai': crawler_miaopai.Crawler_miaopai,
# 'pearvideo': crawler_pear.Crawler_pear,
# 'bilibili': crawler_bilibili.Crawler_bilibili,
# 'Mango': crawler_mango,
'抖音': crawler_douyin.Crawler_douyin,
"网易新闻": crawler_wangyi_news.Crawler_wangyi_news,
"kwai": crawler_kwai.Crawler_kwai,
# '抖音': crawler_douyin.Crawler_douyin,
# "网易新闻": crawler_wangyi_news.Crawler_wangyi_news,
# "kwai": crawler_kwai.Crawler_kwai,
"weibo": crawler_weibo.Crawler_weibo,
"douban":crawler_douban.Crawler_douban
}
......
......@@ -5,4 +5,9 @@ absl-py==0.9.0
kdl==0.2.15
redis==3.5.3
elasticsearch==7.8.0
qiniu==7.2.8
\ No newline at end of file
qiniu==7.2.8
aiohttp==3.6.2
bs4==4.9.1
selenium==3.141.0
fontTools==4.13.0
numpy==1.19.1
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment