#! /bin/bash PYTHON_PATH=/home/gaoyazhe/miniconda3/bin/python MODEL_PATH=/srv/apps/ffm-baseline/eda/esmm DATA_PATH=/home/gmuser/esmm_data echo "start time" current=$(date "+%Y-%m-%d %H:%M:%S") timeStamp=$(date -d "$current" +%s) currentTimeStamp=$((timeStamp*1000+`date "+%N"`/1000000)) echo $current echo "rm leave tfrecord" rm ${DATA_PATH}/tr/* rm ${DATA_PATH}/va/* rm ${DATA_PATH}/native/* rm ${DATA_PATH}/nearby/* rm -r ${DATA_PATH}/model_ckpt/DeepCvrMTL/201* echo "data2ffm" ${PYTHON_PATH} ${MODEL_PATH}/Feature_pipline/data2ffm.py > ${DATA_PATH}/infer.log all_sample=$((`cat ${DATA_PATH}/tr.csv | awk -F '\t' '{print$5}' | awk -F ',' '{print$2$3$4}' | sort | uniq | wc -l`)) uniq_feat=$((`cat ${DATA_PATH}/tr.csv | awk -F '\t' '{print$5}' | awk -F ',' '{print$4}' | sort | uniq -u | wc -l`)) repe_feat=$((all_sample-uniq_feat)) echo "Bayes Error Rate": $((repe_feat*100/all_sample))% echo "split data" split -l $((`wc -l < ${DATA_PATH}/tr.csv`/15)) ${DATA_PATH}/tr.csv -d -a 4 ${DATA_PATH}/tr/tr_ --additional-suffix=.csv split -l $((`wc -l < ${DATA_PATH}/va.csv`/5)) ${DATA_PATH}/va.csv -d -a 4 ${DATA_PATH}/va/va_ --additional-suffix=.csv split -l $((`wc -l < ${DATA_PATH}/native.csv`/15)) ${DATA_PATH}/native.csv -d -a 4 ${DATA_PATH}/native/native_ --additional-suffix=.csv split -l $((`wc -l < ${DATA_PATH}/nearby.csv`/5)) ${DATA_PATH}/nearby.csv -d -a 4 ${DATA_PATH}/nearby/nearby_ --additional-suffix=.csv echo "csv to tfrecord" ${PYTHON_PATH} ${MODEL_PATH}/Feature_pipline/get_tfrecord.py --input_dir=${DATA_PATH}/tr/ --output_dir=${DATA_PATH}/tr/ ${PYTHON_PATH} ${MODEL_PATH}/Feature_pipline/get_tfrecord.py --input_dir=${DATA_PATH}/va/ --output_dir=${DATA_PATH}/va/ ${PYTHON_PATH} ${MODEL_PATH}/Feature_pipline/get_tfrecord.py --input_dir=${DATA_PATH}/native/ --output_dir=${DATA_PATH}/native/ ${PYTHON_PATH} ${MODEL_PATH}/Feature_pipline/get_tfrecord.py --input_dir=${DATA_PATH}/nearby/ --output_dir=${DATA_PATH}/nearby/ cat ${DATA_PATH}/tr/*.tfrecord > ${DATA_PATH}/tr/tr.tfrecord cat ${DATA_PATH}/va/*.tfrecord > ${DATA_PATH}/va/va.tfrecord cat ${DATA_PATH}/native/*.tfrecord > ${DATA_PATH}/native/native.tfrecord cat ${DATA_PATH}/nearby/*.tfrecord > ${DATA_PATH}/nearby/nearby.tfrecord rm ${DATA_PATH}/tr/tr_* rm ${DATA_PATH}/va/va_* rm ${DATA_PATH}/native/native_* rm ${DATA_PATH}/nearby/nearby_* echo "data transform time" current=$(date "+%Y-%m-%d %H:%M:%S") timeStamp=$(date -d "$current" +%s) currentTimeStamp=$((timeStamp*1000+`date "+%N"`/1000000)) echo $current echo "train..." ${PYTHON_PATH} ${MODEL_PATH}/Model_pipline/DeepCvrMTL.py --ctr_task_wgt=0.3 --learning_rate=0.0001 --deep_layers=256,128 --dropout=0.8,0.5 --optimizer=Adam --num_epochs=1 --embedding_size=16 --batch_size=1024 --field_size=8 --feature_size=2000 --l2_reg=0.005 --log_steps=100 --num_threads=36 --model_dir=${DATA_PATH}/model_ckpt/DeepCvrMTL/ --data_dir=${DATA_PATH} --task_type=train echo "train time" current=$(date "+%Y-%m-%d %H:%M:%S") timeStamp=$(date -d "$current" +%s) currentTimeStamp=$((timeStamp*1000+`date "+%N"`/1000000)) echo $current echo "infer native..." ${PYTHON_PATH} ${MODEL_PATH}/Model_pipline/DeepCvrMTL.py --ctr_task_wgt=0.3 --learning_rate=0.0001 --deep_layers=256,128 --dropout=0.8,0.5 --optimizer=Adam --num_epochs=1 --embedding_size=16 --batch_size=1024 --field_size=8 --feature_size=2000 --l2_reg=0.005 --log_steps=100 --num_threads=36 --model_dir=${DATA_PATH}/model_ckpt/DeepCvrMTL/ --data_dir=${DATA_PATH}/native --task_type=infer > ${DATA_PATH}/infer.log echo "infer nearby..." ${PYTHON_PATH} ${MODEL_PATH}/Model_pipline/DeepCvrMTL.py --ctr_task_wgt=0.3 --learning_rate=0.0001 --deep_layers=256,128 --dropout=0.8,0.5 --optimizer=Adam --num_epochs=1 --embedding_size=16 --batch_size=1024 --field_size=8 --feature_size=2000 --l2_reg=0.005 --log_steps=100 --num_threads=36 --model_dir=${DATA_PATH}/model_ckpt/DeepCvrMTL/ --data_dir=${DATA_PATH}/nearby --task_type=infer > ${DATA_PATH}/infer.log echo "sort and 2sql" ${PYTHON_PATH} ${MODEL_PATH}/Model_pipline/sort_and_2sql.py echo "infer and sort and 2sql time" current=$(date "+%Y-%m-%d %H:%M:%S") timeStamp=$(date -d "$current" +%s) currentTimeStamp=$((timeStamp*1000+`date "+%N"`/1000000)) echo $current ${PYTHON_PATH} ${MODEL_PATH}/Model_pipline/send_mail.py