Commit d8cf717f authored by 赵威's avatar 赵威

add script

parent 4cdb7529
cd /srv/apps/strategy_embedding/
nohup spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 1 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/strategy_embedding/word_vector/answer.py > ~/answer_item2vec.log &
......@@ -9,8 +9,7 @@ sys.path.append(os.path.realpath("."))
import random
from gensim.models import Word2Vec, word2vec
from utils.date import (get_ndays_before_no_minus,
get_ndays_before_with_format)
from utils.date import (get_ndays_before_no_minus, get_ndays_before_with_format)
from utils.es import get_online_ids
from utils.files import DATA_PATH, MODEL_PATH
from utils.spark import get_spark
......@@ -35,7 +34,7 @@ def get_answer_click_data(spark, start, end):
where action = 'page_view'
AND partition_date BETWEEN '{}' AND '{}'
AND page_name='answer_detail'
AND page_stay>=1
AND page_stay>=2
AND cl_id is not null
AND cl_id != ''
AND business_id is not null
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment