Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
7a48b8e5
Commit
7a48b8e5
authored
Jan 07, 2019
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改ffm转化函数,改成features累计相加
parent
1d7d0e0c
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
54 additions
and
30 deletions
+54
-30
get_tfrecord.py
eda/esmm/Feature_pipline/get_tfrecord.py
+18
-18
ffm.py
tensnsorflow/ffm.py
+22
-11
test.py
tensnsorflow/test.py
+14
-1
No files found.
eda/esmm/Feature_pipline/get_tfrecord.py
View file @
7a48b8e5
...
...
@@ -70,24 +70,24 @@ def gen_tfrecords(in_file):
#"feat_vals": tf.train.Feature(float_list=tf.train.FloatList(value=feat_vals))})
#3 特殊字段单独处理
for
f
,
(
fname
,
def_id
)
in
UMH_Fileds
.
items
():
if
f
in
ffv
[:,
0
]:
mask
=
np
.
array
(
f
==
ffv
[:,
0
])
feat_ids
=
ffv
[
mask
,
1
]
feat_vals
=
ffv
[
mask
,
2
]
else
:
feat_ids
=
np
.
array
([
def_id
])
feat_vals
=
np
.
array
([
1.0
])
feature
.
update
({
fname
+
"ids"
:
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
feat_ids
.
astype
(
np
.
int
))),
fname
+
"vals"
:
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
feat_vals
.
astype
(
np
.
float
)))})
for
f
,
(
fname
,
def_id
)
in
Ad_Fileds
.
items
():
if
f
in
ffv
[:,
0
]:
mask
=
np
.
array
(
f
==
ffv
[:,
0
])
feat_ids
=
ffv
[
mask
,
1
]
else
:
feat_ids
=
np
.
array
([
def_id
])
feature
.
update
({
fname
+
"ids"
:
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
feat_ids
.
astype
(
np
.
int
)))})
#
for f, (fname, def_id) in UMH_Fileds.items():
#
if f in ffv[:,0]:
#
mask = np.array(f == ffv[:,0])
#
feat_ids = ffv[mask,1]
#
feat_vals= ffv[mask,2]
#
else:
#
feat_ids = np.array([def_id])
#
feat_vals = np.array([1.0])
#
feature.update({fname+"ids": tf.train.Feature(int64_list=tf.train.Int64List(value=feat_ids.astype(np.int))),
#
fname+"vals": tf.train.Feature(float_list=tf.train.FloatList(value=feat_vals.astype(np.float)))})
#
for f, (fname, def_id) in Ad_Fileds.items():
#
if f in ffv[:,0]:
#
mask = np.array(f == ffv[:,0])
#
feat_ids = ffv[mask,1]
#
else:
#
feat_ids = np.array([def_id])
#
feature.update({fname+"ids": tf.train.Feature(int64_list=tf.train.Int64List(value=feat_ids.astype(np.int)))})
# serialized to Example
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
feature
))
...
...
tensnsorflow/ffm.py
View file @
7a48b8e5
...
...
@@ -38,6 +38,10 @@ class multiFFMFormatPandas:
self
.
y
=
None
def
fit
(
self
,
df
,
y
=
None
):
b
=
df
.
dtypes
c
=
list
(
b
.
values
)
d
=
tuple
(
df
.
dtypes
.
to_dict
())
f
=
dict
(
zip
(
d
,
c
))
self
.
y
=
y
df_ffm
=
df
[
df
.
columns
.
difference
([
self
.
y
])]
if
self
.
field_index_
is
None
:
...
...
@@ -49,17 +53,24 @@ class multiFFMFormatPandas:
if
self
.
feature_index_
is
None
:
self
.
feature_index_
=
dict
()
for
col
in
df
.
columns
:
self
.
feature_index_
[
col
]
=
1
last_idx
=
1
vals
=
df
[
col
]
.
unique
()
for
val
in
vals
:
if
pd
.
isnull
(
val
):
continue
name
=
'{}_{}'
.
format
(
col
,
val
)
if
name
not
in
self
.
feature_index_
:
self
.
feature_index_
[
name
]
=
last_idx
last_idx
+=
1
last_idx
=
1
l
=
list
(
df
.
columns
)
l
.
remove
(
y
)
for
col
in
l
:
if
f
[
col
]
==
"O"
:
vals
=
df
[
col
]
.
unique
()
for
val
in
vals
:
if
pd
.
isnull
(
val
):
continue
name
=
'{}_{}'
.
format
(
col
,
val
)
if
name
not
in
self
.
feature_index_
:
self
.
feature_index_
[
name
]
=
last_idx
last_idx
+=
1
else
:
self
.
feature_index_
[
col
]
=
last_idx
last_idx
+=
1
print
(
"last_idx"
)
print
(
last_idx
)
return
self
def
fit_transform
(
self
,
df
,
y
=
None
,
n
=
50000
,
processes
=
4
):
...
...
tensnsorflow/test.py
View file @
7a48b8e5
...
...
@@ -75,6 +75,19 @@ def get_cid():
if
__name__
==
"__main__"
:
get_cid
()
writer
=
tf
.
python_io
.
TFRecordWriter
(
'csv_train.tfrecords'
)
for
i
in
xrange
(
train_values
.
shape
[
0
]):
image_raw
=
train_values
[
i
]
.
tostring
()
# build example protobuf
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image_raw'
:
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
image_raw
])),
'label'
:
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
train_labels
[
i
]]))
}))
writer
.
write
(
record
=
example
.
SerializeToString
())
writer
.
close
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment