Commit dc9ebe26 authored by zhangmeng's avatar zhangmeng

添加说明文档

parent d18bba96
......@@ -16,3 +16,8 @@ python /srv/apps/crawler/crawler_sys/framework/search_page_single_process.py
2. source /srv/envs/esmm/bin/activate
3. python crawler/crawler_sys/utils/get_query_result.py
/opt/spark/bin/spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 1 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.executorEnv.LD_LIBRARY_PATH="/opt/java/jdk1.8.0_181/jre/lib/amd64/server:/opt/cloudera/parcels/CDH-5.16.1-1.cdh5.16.1.p0.3/lib64" --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/crawler/tasks/crawler_week_report.py
#小红书爬取过程
1.将github上面litao分支的代码拉到服务器spider-prod-001上
2.配置环境 激活环境->python->import sys->sys.path->跳转到里面那个site-packages目录下面->vim mypath.pth->改成自己的项目路径 运行的时候就不会报出crawler包找不到
3.小红书导出为txt的文件 在maintenance的temfile目录下的那个脚本 使用的时候把邮箱跟密码替换成自己的
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment