Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
8b0682b3
Commit
8b0682b3
authored
Aug 06, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix bugs
parent
6b6f8d78
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
5 deletions
+13
-5
diary-training.py
diary-training.py
+13
-5
No files found.
diary-training.py
View file @
8b0682b3
...
@@ -32,8 +32,8 @@ exposure = exposure.rename(columns={0:"cid",1:"device_id",2:"time"})
...
@@ -32,8 +32,8 @@ exposure = exposure.rename(columns={0:"cid",1:"device_id",2:"time"})
print
(
"成功获取曝光表里的数据"
)
print
(
"成功获取曝光表里的数据"
)
# 求曝光表和点击表的差集合
# 求曝光表和点击表的差集合
exposure
.
append
(
click
)
exposure
=
exposure
.
append
(
click
)
exposure
.
append
(
click
)
exposure
=
exposure
.
append
(
click
)
subset
=
click
.
columns
.
tolist
()
subset
=
click
.
columns
.
tolist
()
exposure
=
exposure
.
drop_duplicates
(
subset
=
subset
,
keep
=
False
)
exposure
=
exposure
.
drop_duplicates
(
subset
=
subset
,
keep
=
False
)
print
(
"成功完成曝光表和点击表的差集合"
)
print
(
"成功完成曝光表和点击表的差集合"
)
...
@@ -66,6 +66,8 @@ print(data.head(2))
...
@@ -66,6 +66,8 @@ print(data.head(2))
data
=
shuffle
(
data
)
data
=
shuffle
(
data
)
print
(
"start ffm transform"
)
print
(
"start ffm transform"
)
# ffm 格式转换函数、类
# ffm 格式转换函数、类
class
FFMFormatPandas
:
class
FFMFormatPandas
:
def
__init__
(
self
):
def
__init__
(
self
):
...
@@ -123,6 +125,7 @@ class FFMFormatPandas:
...
@@ -123,6 +125,7 @@ class FFMFormatPandas:
t
=
df
.
dtypes
.
to_dict
()
t
=
df
.
dtypes
.
to_dict
()
return
pd
.
Series
({
idx
:
self
.
transform_row_
(
row
,
t
)
for
idx
,
row
in
df
.
iterrows
()})
return
pd
.
Series
({
idx
:
self
.
transform_row_
(
row
,
t
)
for
idx
,
row
in
df
.
iterrows
()})
ffm_train
=
FFMFormatPandas
()
ffm_train
=
FFMFormatPandas
()
data
=
ffm_train
.
fit_transform
(
data
,
y
=
'y'
)
data
=
ffm_train
.
fit_transform
(
data
,
y
=
'y'
)
print
(
"done transform ffm"
)
print
(
"done transform ffm"
)
...
@@ -131,6 +134,8 @@ data = pd.read_csv("/home/zhangyanzhao/data.csv",header=None)
...
@@ -131,6 +134,8 @@ data = pd.read_csv("/home/zhangyanzhao/data.csv",header=None)
print
(
"数据集大小"
)
print
(
"数据集大小"
)
print
(
data
.
shape
)
print
(
data
.
shape
)
print
(
data
.
head
(
2
))
print
(
data
.
head
(
2
))
'''
n = np.rint(data.shape[0]/8)
n = np.rint(data.shape[0]/8)
m = np.rint(data.shape[0]*(3/8))
m = np.rint(data.shape[0]*(3/8))
# 1/8的数据集用来做测试集
# 1/8的数据集用来做测试集
...
@@ -148,19 +153,22 @@ train = data.loc[m+1:]
...
@@ -148,19 +153,22 @@ train = data.loc[m+1:]
print("训练集大小")
print("训练集大小")
print(train.shape)
print(train.shape)
train.to_csv("/home/zhangyanzhao/train.csv",index = False,header = None)
train.to_csv("/home/zhangyanzhao/train.csv",index = False,header = None)
'''
print
(
"start training"
)
print
(
"start training"
)
ffm_model
=
xl
.
create_ffm
()
ffm_model
=
xl
.
create_ffm
()
ffm_model
.
setTrain
(
"/home/zhangyanzhao/
train
.csv"
)
ffm_model
.
setTrain
(
"/home/zhangyanzhao/
data
.csv"
)
ffm_model
.
setValidate
(
"/home/zhangyanzhao/validation.csv"
)
#
ffm_model.setValidate("/home/zhangyanzhao/validation.csv")
param
=
{
'task'
:
'binary'
,
'lr'
:
0.
2
,
param
=
{
'task'
:
'binary'
,
'lr'
:
0.
05
,
'lambda'
:
0.002
,
'metric'
:
'auc'
}
'lambda'
:
0.002
,
'metric'
:
'auc'
}
ffm_model
.
fit
(
param
,
'/home/zhangyanzhao/model.out'
)
ffm_model
.
fit
(
param
,
'/home/zhangyanzhao/model.out'
)
'''
ffm_model.setTest("/home/zhangyanzhao/test.csv")
ffm_model.setTest("/home/zhangyanzhao/test.csv")
ffm_model.setSigmoid()
ffm_model.setSigmoid()
ffm_model.predict("/home/zhangyanzhao/model.out", "/home/zhangyanzhao/output.txt")
ffm_model.predict("/home/zhangyanzhao/model.out", "/home/zhangyanzhao/output.txt")
'''
print
(
"end"
)
print
(
"end"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment