Commit d6a35a2e authored by 张彦钊's avatar 张彦钊

fix sql bug

parent 3ac71436
import xlearn as xl
from config import *
from diaryCandidateSet import get_eachCityDiaryTop2000
print("Start training")
ffm_model = xl.create_ffm()
ffm_model.setTrain(DIRECTORY_PATH + "train{0}-{1}.csv".format(DATA_START_DATE, VALIDATION_DATE))
ffm_model.setValidate(DIRECTORY_PATH + "validation{0}.csv".format(VALIDATION_DATE))
param = {'task': 'binary', 'lr': lr, 'lambda': l2_lambda, 'metric': 'auc'}
def train():
print("Start training")
ffm_model = xl.create_ffm()
ffm_model.setTrain(DIRECTORY_PATH + "train{0}-{1}.csv".format(DATA_START_DATE, VALIDATION_DATE))
ffm_model.setValidate(DIRECTORY_PATH + "validation{0}.csv".format(VALIDATION_DATE))
ffm_model.fit(param, DIRECTORY_PATH + "model_{0}-{1}_lr{2}_lambda{3}.out".format(DATA_START_DATE,
DATA_END_DATE, lr, l2_lambda))
param = {'task': 'binary', 'lr': lr, 'lambda': l2_lambda, 'metric': 'auc'}
ffm_model.fit(param, DIRECTORY_PATH + "model_{0}-{1}_lr{2}_lambda{3}.out".format(DATA_START_DATE,
DATA_END_DATE, lr, l2_lambda))
print("predicting")
ffm_model.setTest(DIRECTORY_PATH + "test{0}.csv".format(TEST_DATE))
ffm_model.setSigmoid()
ffm_model.predict(DIRECTORY_PATH + "model_{0}-{1}_lr{2}_lambda{3}.out".format(DATA_START_DATE,
DATA_END_DATE, lr, l2_lambda),
DIRECTORY_PATH + "testset{0}_output_model_{1}-{2}_lr{3}_lambda{4}.txt".format(TEST_DATE,
DATA_START_DATE,
DATA_END_DATE, lr,
l2_lambda))
print("predicting")
ffm_model.setTest(DIRECTORY_PATH + "test{0}.csv".format(TEST_DATE))
ffm_model.setSigmoid()
ffm_model.predict(DIRECTORY_PATH + "model_{0}-{1}_lr{2}_lambda{3}.out".format(DATA_START_DATE,
DATA_END_DATE, lr, l2_lambda),
DIRECTORY_PATH + "testset{0}_output_model_{1}-{2}_lr{3}_lambda{4}.txt".format(TEST_DATE,
DATA_START_DATE,
DATA_END_DATE, lr,
l2_lambda))
print('---------------candidates--------------')
get_eachCityDiaryTop2000()
(1) 在服务器上创建nvwa隔离环境:
ssh -A gaoyazhe@bastion.prod.gengmei
rank-compute01
virtualenv -p /usr/bin/python3 nvwa
source nvwa/bin/activate
(2) 在nvwa中安装xlearn:
#参考链接:http://xlearn-doc.readthedocs.io/en/latest/install.html
cd nvwa
git clone https://github.com/aksnzhy/xlearn.git
cd xlearn
./build.sh
(3) 在nvwa中安装相关工具包:
pip install pymysql
pip install numpy
sudo apt-get install python3-dev
pip install pandas
问题:
#安装pandas出错:Python.h: No such file or directory
#解决方案:sudo apt-get install python3-dev
pip install scipy
pip install scikit-learn
pip uninstall numpy
pip install numpy==1.14.5
问题:
#import sklearn出错:RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility
#解决方案:pip uninstall numpy;pip install numpy==1.14.5
(4) requirements:
pymysql=='0.9.2'
numpy=='1.14.5'
pandas=='0.22.0'
scipy=='1.1.0'
sklearn=='0.19.2'
\ No newline at end of file
from processData import *
from diaryTraining import *
from diaryCandidateSet import get_eachCityDiaryTop2000
# 把数据获取、特征转换、模型训练的模型串联在一起
if __name__ == "__main__":
data_fe = feature_en()
ffm_transform(data_fe)
train()
print('---------------prepare candidates--------------')
get_eachCityDiaryTop2000()
......@@ -3,6 +3,7 @@ from utils import con_sql
def fetch_user_profile(device_id):
# TODO sql语句中的device_id可能对应多个city_id
sql = "select device_id,city_id from data_feed_click limit 1"
sql = "select device_id,city_id from " \
"data_feed_click where device_id = '{0}' limit 1".format(device_id)
user_profile = con_sql(sql)
return user_profile, user_profile.empty
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment