Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
a7d512a3
Commit
a7d512a3
authored
Jan 15, 2019
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ffm转化累计相加
parent
7a5e54d0
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
22 additions
and
21 deletions
+22
-21
data2ffm.py
eda/esmm/Feature_pipline/data2ffm.py
+22
-21
No files found.
eda/esmm/Feature_pipline/data2ffm.py
View file @
a7d512a3
...
...
@@ -38,6 +38,10 @@ class multiFFMFormatPandas:
self
.
y
=
None
def
fit
(
self
,
df
,
y
=
None
):
b
=
df
.
dtypes
c
=
list
(
b
.
values
)
d
=
tuple
(
df
.
dtypes
.
to_dict
())
f
=
dict
(
zip
(
d
,
c
))
self
.
y
=
y
df_ffm
=
df
[
df
.
columns
.
difference
([
self
.
y
])]
if
self
.
field_index_
is
None
:
...
...
@@ -49,19 +53,24 @@ class multiFFMFormatPandas:
if
self
.
feature_index_
is
None
:
self
.
feature_index_
=
dict
()
for
col
in
df
.
columns
:
self
.
feature_index_
[
col
]
=
1
last_idx
=
1
vals
=
list
(
df
[
col
]
.
unique
())
vals
.
append
(
"na"
)
vals
=
set
(
vals
)
for
val
in
vals
:
if
pd
.
isnull
(
val
):
continue
name
=
'{}_{}'
.
format
(
col
,
val
)
if
name
not
in
self
.
feature_index_
:
self
.
feature_index_
[
name
]
=
last_idx
last_idx
+=
1
last_idx
=
1
l
=
list
(
df
.
columns
)
l
.
remove
(
y
)
for
col
in
l
:
if
f
[
col
]
==
"O"
:
vals
=
list
(
df
[
col
]
.
unique
())
vals
.
append
(
"na"
)
vals
=
set
(
vals
)
for
val
in
vals
:
if
pd
.
isnull
(
val
):
continue
name
=
'{}_{}'
.
format
(
col
,
val
)
if
name
not
in
self
.
feature_index_
:
self
.
feature_index_
[
name
]
=
last_idx
last_idx
+=
1
else
:
self
.
feature_index_
[
col
]
=
last_idx
last_idx
+=
1
return
self
def
fit_transform
(
self
,
df
,
y
=
None
,
n
=
50000
,
processes
=
4
):
...
...
@@ -131,14 +140,6 @@ class multiFFMFormatPandas:
return
pd
.
Series
({
idx
:
self
.
transform_row_
(
row
,
t
)
for
idx
,
row
in
df
.
iterrows
()})
# 下面这个方法不是这个类原有的方法,是新增的。目的是用来判断这个用户是不是在训练数据集中存在
def
is_feature_index_exist
(
self
,
name
):
if
name
in
self
.
feature_index_
:
return
True
else
:
return
False
def
get_data
():
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
sql
=
"select max(stat_date) from esmm_train_data"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment