Commit 21dc7f75 authored by litaolemo's avatar litaolemo

update

parent eb27a82b
......@@ -2,17 +2,17 @@
## 发布者页爬虫
1. 部署在BJ-GM-Prod-Cos-faiss001/srv/apps/ crontab -e
2. 切换权限 sudo su - gmuser
3. source /root/anaconda3/bin/activate
4. 创建虚拟环境 conda activate crawler_env/conda deactivate
5. 抓取程序 nohup python /srv/apps/crawler/crawler_sys/framework/update_data_in_target_releasers_multi_process_by_date_from_redis.py > /data/log/fect_task.log &
6. 写入抓取url程序 python /srv/apps/crawler/crawler_sys/framework/write_releasers_to_redis.py -p weibo -d 1 -proxies 2
7.
2. sudo su - gmuser
3. workon litao
4. 抓取程序 nohup python /srv/apps/crawler/crawler_sys/framework/update_data_in_target_releasers_multi_process_by_date_from_redis.py > /data/log/fect_task.log &
5. 写入抓取url程序 python /srv/apps/crawler/crawler_sys/framework/write_releasers_to_redis.py -p weibo -d 1 -proxies 2
##搜索页爬虫
python /srv/apps/crawler/crawler_sys/framework/search_page_single_process.py
## 数据周报
服务器 airflow002
1. 切换权限 sudo su - gmuser
2. source /root/anaconda3/bin/activate
2. source /srv/envs/esmm/bin/activate
3. python crawler/crawler_sys/utils/get_query_result.py
/opt/spark/bin/spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 1 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.executorEnv.LD_LIBRARY_PATH="/opt/java/jdk1.8.0_181/jre/lib/amd64/server:/opt/cloudera/parcels/CDH-5.16.1-1.cdh5.16.1.p0.3/lib64" --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/crawler/tasks/crawler_week_report.py
......@@ -2,7 +2,7 @@
"""
Created on Wed Feb 20 16:08:17 2019
@author: zhouyujiang
@author: litao
发送有附件的 邮件
......@@ -131,8 +131,7 @@ def send_file_email(file_path, data_str, email_group=[],
if __name__ == '__main__':
send_file_email("",'2019-02-21',
sender="litao@igengmei.com",email_group=["litao@igengmei.com"],email_msg_body_str="test",title_str="test",cc_group=["litao@igengmei.com"])
send_file_email("",'',sender="litao@igengmei.com",email_group=["litao@igengmei.com"],email_msg_body_str="test",title_str="test",cc_group=["litao@igengmei.com"],file="/srv/apps/meta_base_code/tmp.log")
......
......@@ -10,6 +10,5 @@ aiohttp==3.6.2
selenium==3.141.0
fontTools==4.13.0
numpy==1.19.1
pandas==1.0.5
mymsql==0.10.0
qiniu==7.1.4
\ No newline at end of file
#!/bin/bash
#sudo su - gmuser
source /root/anaconda3/bin/activate
conda activate crawler_env
#source /root/anaconda3/bin/activate
#conda activate crawler_env
python /srv/apps/crawler/crawler_sys/framework/write_releasers_to_redis.py -p weibo -d 1 -proxies 5 > /data/log/crawler/write_task.log &
python /srv/apps/crawler/crawler_sys/framework/write_releasers_to_redis.py -p douban -d 1 -proxies 5 > /data/log/crawler/write_task.log &
\ No newline at end of file
#!/bin/bash
source /root/anaconda3/bin/activate
conda activate crawler_env
#source /root/anaconda3/bin/activate
#conda activate crawler_env
sudo su - gmuser
ps aux|grep update_data_in_target_releasers_multi_process_by_date_from_redis|grep -v grep|cut -c 9-15|xargs kill -15
ps aux|grep cal_ni_and_put_to_backend|grep -v grep|cut -c 9-15|xargs kill -15
current_date=`date -d "-1 day" "+%Y%m%d"`
python /srv/apps/crawler/crawler_sys/framework/update_data_in_target_releasers_multi_process_by_date_from_redis.py > /data/log/crawler/write_task_${current_date}_.log &
python /srv/apps/crawler/crawler_sys/scheduler/cal_ni_and_put_to_backend.py > /data/log/crawler/.log &
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment