Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
d6a35a2e
Commit
d6a35a2e
authored
Aug 08, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix sql bug
parent
3ac71436
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
67 additions
and
20 deletions
+67
-20
diaryTraining.py
diaryTraining.py
+19
-19
predictDiary.py
predictDiary.py
+0
-0
readme.txt
readme.txt
+33
-0
train.py
train.py
+13
-0
userProfile.py
userProfile.py
+2
-1
No files found.
diaryTraining.py
View file @
d6a35a2e
import
xlearn
as
xl
from
config
import
*
from
diaryCandidateSet
import
get_eachCityDiaryTop2000
print
(
"Start training"
)
ffm_model
=
xl
.
create_ffm
()
ffm_model
.
setTrain
(
DIRECTORY_PATH
+
"train{0}-{1}.csv"
.
format
(
DATA_START_DATE
,
VALIDATION_DATE
))
ffm_model
.
setValidate
(
DIRECTORY_PATH
+
"validation{0}.csv"
.
format
(
VALIDATION_DATE
))
param
=
{
'task'
:
'binary'
,
'lr'
:
lr
,
'lambda'
:
l2_lambda
,
'metric'
:
'auc'
}
def
train
():
print
(
"Start training"
)
ffm_model
=
xl
.
create_ffm
()
ffm_model
.
setTrain
(
DIRECTORY_PATH
+
"train{0}-{1}.csv"
.
format
(
DATA_START_DATE
,
VALIDATION_DATE
))
ffm_model
.
setValidate
(
DIRECTORY_PATH
+
"validation{0}.csv"
.
format
(
VALIDATION_DATE
))
ffm_model
.
fit
(
param
,
DIRECTORY_PATH
+
"model_{0}-{1}_lr{2}_lambda{3}.out"
.
format
(
DATA_START_DATE
,
DATA_END_DATE
,
lr
,
l2_lambda
))
param
=
{
'task'
:
'binary'
,
'lr'
:
lr
,
'lambda'
:
l2_lambda
,
'metric'
:
'auc'
}
ffm_model
.
fit
(
param
,
DIRECTORY_PATH
+
"model_{0}-{1}_lr{2}_lambda{3}.out"
.
format
(
DATA_START_DATE
,
DATA_END_DATE
,
lr
,
l2_lambda
))
print
(
"predicting"
)
ffm_model
.
setTest
(
DIRECTORY_PATH
+
"test{0}.csv"
.
format
(
TEST_DATE
))
ffm_model
.
setSigmoid
()
ffm_model
.
predict
(
DIRECTORY_PATH
+
"model_{0}-{1}_lr{2}_lambda{3}.out"
.
format
(
DATA_START_DATE
,
DATA_END_DATE
,
lr
,
l2_lambda
),
DIRECTORY_PATH
+
"testset{0}_output_model_{1}-{2}_lr{3}_lambda{4}.txt"
.
format
(
TEST_DATE
,
DATA_START_DATE
,
DATA_END_DATE
,
lr
,
l2_lambda
))
print
(
"predicting"
)
ffm_model
.
setTest
(
DIRECTORY_PATH
+
"test{0}.csv"
.
format
(
TEST_DATE
))
ffm_model
.
setSigmoid
()
ffm_model
.
predict
(
DIRECTORY_PATH
+
"model_{0}-{1}_lr{2}_lambda{3}.out"
.
format
(
DATA_START_DATE
,
DATA_END_DATE
,
lr
,
l2_lambda
),
DIRECTORY_PATH
+
"testset{0}_output_model_{1}-{2}_lr{3}_lambda{4}.txt"
.
format
(
TEST_DATE
,
DATA_START_DATE
,
DATA_END_DATE
,
lr
,
l2_lambda
))
print
(
'---------------candidates--------------'
)
get_eachCityDiaryTop2000
()
userDiaryPredict
.py
→
predictDiary
.py
View file @
d6a35a2e
File moved
readme.txt
0 → 100644
View file @
d6a35a2e
(1) 在服务器上创建nvwa隔离环境:
ssh -A gaoyazhe@bastion.prod.gengmei
rank-compute01
virtualenv -p /usr/bin/python3 nvwa
source nvwa/bin/activate
(2) 在nvwa中安装xlearn:
#参考链接:http://xlearn-doc.readthedocs.io/en/latest/install.html
cd nvwa
git clone https://github.com/aksnzhy/xlearn.git
cd xlearn
./build.sh
(3) 在nvwa中安装相关工具包:
pip install pymysql
pip install numpy
sudo apt-get install python3-dev
pip install pandas
问题:
#安装pandas出错:Python.h: No such file or directory
#解决方案:sudo apt-get install python3-dev
pip install scipy
pip install scikit-learn
pip uninstall numpy
pip install numpy==1.14.5
问题:
#import sklearn出错:RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility
#解决方案:pip uninstall numpy;pip install numpy==1.14.5
(4) requirements:
pymysql=='0.9.2'
numpy=='1.14.5'
pandas=='0.22.0'
scipy=='1.1.0'
sklearn=='0.19.2'
\ No newline at end of file
train.py
0 → 100644
View file @
d6a35a2e
from
processData
import
*
from
diaryTraining
import
*
from
diaryCandidateSet
import
get_eachCityDiaryTop2000
# 把数据获取、特征转换、模型训练的模型串联在一起
if
__name__
==
"__main__"
:
data_fe
=
feature_en
()
ffm_transform
(
data_fe
)
train
()
print
(
'---------------prepare candidates--------------'
)
get_eachCityDiaryTop2000
()
userProfile.py
View file @
d6a35a2e
...
...
@@ -3,6 +3,7 @@ from utils import con_sql
def
fetch_user_profile
(
device_id
):
# TODO sql语句中的device_id可能对应多个city_id
sql
=
"select device_id,city_id from data_feed_click limit 1"
sql
=
"select device_id,city_id from "
\
"data_feed_click where device_id = '{0}' limit 1"
.
format
(
device_id
)
user_profile
=
con_sql
(
sql
)
return
user_profile
,
user_profile
.
empty
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment