Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
gm_strategy_cvr
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
gm_strategy_cvr
Commits
430956d8
Commit
430956d8
authored
Jul 20, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add prediction
parent
aeb4ca60
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
79 additions
and
27 deletions
+79
-27
requirements.txt
requirements.txt
+2
-0
__init__.py
src/__init__.py
+0
-0
main.py
src/main.py
+15
-6
fe.py
src/models/esmm/fe.py
+5
-2
input_fn.py
src/models/esmm/input_fn.py
+16
-11
model.py
src/models/esmm/model.py
+41
-8
No files found.
requirements.txt
View file @
430956d8
tensorflow
==1.15.2
keras
==2.3.1
scikit-learn
==0.23.1
redis
==2.10.5
src/__init__.py
0 → 100644
View file @
430956d8
src/main.py
View file @
430956d8
import
time
from
datetime
import
datetime
from
pathlib
import
Path
import
tensorflow
as
tf
...
...
@@ -6,12 +8,13 @@ from sklearn.model_selection import train_test_split
from
models.esmm.fe
import
(
click_feature_engineering
,
device_feature_engineering
,
diary_feature_engineering
,
join_features
,
read_csv_data
)
from
models.esmm.input_fn
import
build_features
,
esmm_input_fn
from
models.esmm.model
import
esmm_model_fn
,
model_export
from
models.esmm.model
import
esmm_model_fn
,
model_export
,
model_predict
tf
.
compat
.
v1
.
enable_eager_execution
()
def
main
():
time_begin
=
time
.
time
()
device_df
,
diary_df
,
click_df
,
conversion_df
=
read_csv_data
(
Path
(
"~/Desktop/cvr_data/"
))
device_df
=
device_feature_engineering
(
device_df
)
diary_df
=
diary_feature_engineering
(
diary_df
)
...
...
@@ -27,12 +30,18 @@ def main():
model_path
=
str
(
Path
(
"~/Desktop/models/"
)
.
expanduser
())
model
=
tf
.
estimator
.
Estimator
(
model_fn
=
esmm_model_fn
,
params
=
params
,
model_dir
=
model_path
)
model
.
train
(
input_fn
=
lambda
:
esmm_input_fn
(
train_df
,
shuffle
=
True
),
steps
=
5000
)
model
.
evaluate
(
input_fn
=
lambda
:
esmm_input_fn
(
val_df
,
False
),
steps
=
5000
)
model_export
(
model
,
all_features
,
model_path
)
print
(
"train"
)
model
.
train
(
input_fn
=
lambda
:
esmm_input_fn
(
train_df
.
sample
(
100000
),
shuffle
=
True
),
steps
=
5000
)
model
.
evaluate
(
input_fn
=
lambda
:
esmm_input_fn
(
val_df
.
sample
(
100000
),
False
),
steps
=
5000
)
save_path
=
model_export
(
model
,
all_features
,
model_path
)
predictions
=
model
.
predict
(
input_fn
=
lambda
:
esmm_input_fn
(
test_df
,
False
))
print
(
next
(
iter
(
predictions
)))
# predictions = model.predict(input_fn=lambda: esmm_input_fn(test_df, False))
# print(next(iter(predictions)))
model_predict
(
test_df
.
sample
(
300
),
save_path
)
total_time
=
(
time
.
time
()
-
time_begin
)
/
60
print
(
"cost {:.2f} mins at {}"
.
format
(
total_time
,
datetime
.
now
()))
if
__name__
==
"__main__"
:
...
...
src/models/esmm/fe.py
View file @
430956d8
...
...
@@ -126,8 +126,8 @@ def join_features(device_df, diary_df, cc_df):
df
[
"fd2"
]
=
df
[
"first_demands"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
df
[
"fd3"
]
=
df
[
"first_demands"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
df
[
"sd1"
]
=
df
[
"second_demands"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
df
[
"sd
1
"
]
=
df
[
"second_demands"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
df
[
"sd
2
"
]
=
df
[
"second_demands"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
df
[
"sd
2
"
]
=
df
[
"second_demands"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
df
[
"sd
3
"
]
=
df
[
"second_demands"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
df
[
"fs1"
]
=
df
[
"first_solutions"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
df
[
"fs2"
]
=
df
[
"first_solutions"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
df
[
"fs3"
]
=
df
[
"first_solutions"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
...
...
@@ -154,5 +154,8 @@ def join_features(device_df, diary_df, cc_df):
"second_solutions"
,
"first_positions_x"
,
"first_positions_y"
,
"first_positions"
,
"second_positions_x"
,
"second_positions_y"
,
"second_positions"
,
"projects_x"
,
"projects_y"
,
"projects"
]
# for col in drop_columns:
# if col in df.columns:
# df.drop(col, inplace=True, axis=1)
df
.
drop
(
drop_columns
,
inplace
=
True
,
axis
=
1
)
return
df
src/models/esmm/input_fn.py
View file @
430956d8
...
...
@@ -13,21 +13,26 @@ def build_features(df):
categorical_columns
=
[
"device_id"
,
"active_type"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"card_id"
,
"is_pure_author"
,
"is_have_reply"
,
"is_have_pure_reply"
,
"content_level"
,
"device_fd"
,
"content_fd"
,
"fd1"
,
"fd2"
,
"fd3"
"device_fd"
,
"content_fd"
,
"fd1"
,
"fd2"
,
"fd3"
,
"device_sd"
,
"content_sd"
,
"sd1"
,
"sd2"
,
"sd3"
,
"device_fs"
,
"content_fs"
,
"fs1"
,
"fs2"
,
"fs3"
,
"device_ss"
,
"content_ss"
,
"ss1"
,
"ss2"
,
"ss3"
,
"device_fp"
,
"content_fp"
,
"fp1"
,
"fp2"
,
"fp3"
,
"device_sp"
,
"content_sp"
,
"sp1"
,
"sp2"
,
"sp3"
,
"device_p"
,
"content_p"
,
"p1"
,
"p2"
,
"p3"
]
categorical_ignore_columns
=
[]
categorical_features
=
[]
for
col
in
categorical_columns
:
if
col
==
"card_id"
:
categorical_features
.
append
(
fc
.
embedding_column
(
fc
.
categorical_column_with_hash_bucket
(
col
,
20000
,
dtype
=
tf
.
int64
),
dimension
=
int
(
df
[
col
]
.
size
**
0.25
)))
elif
col
==
"device_id"
:
categorical_features
.
append
(
fc
.
embedding_column
(
fc
.
categorical_column_with_hash_bucket
(
col
,
200000
),
dimension
=
int
(
df
[
col
]
.
size
**
0.25
)))
else
:
categorical_features
.
append
(
fc
.
indicator_column
(
fc
.
categorical_column_with_vocabulary_list
(
col
,
create_vocabulary_list
(
df
,
col
))))
if
col
not
in
categorical_ignore_columns
:
if
col
==
"card_id"
:
categorical_features
.
append
(
fc
.
embedding_column
(
fc
.
categorical_column_with_hash_bucket
(
col
,
20000
,
dtype
=
tf
.
int64
),
dimension
=
int
(
df
[
col
]
.
size
**
0.25
)))
elif
col
==
"device_id"
:
categorical_features
.
append
(
fc
.
embedding_column
(
fc
.
categorical_column_with_hash_bucket
(
col
,
200000
),
dimension
=
int
(
df
[
col
]
.
size
**
0.25
)))
else
:
categorical_features
.
append
(
fc
.
indicator_column
(
fc
.
categorical_column_with_vocabulary_list
(
col
,
create_vocabulary_list
(
df
,
col
))))
all_features
=
(
numeric_features
+
categorical_features
)
return
all_features
...
...
src/models/esmm/model.py
View file @
430956d8
...
...
@@ -70,12 +70,45 @@ def esmm_model_fn(features, labels, mode, params):
return
res
def
model_export
(
model
,
features
,
save_path
):
feature_spec_columns
=
[]
feature_spec_columns
.
extend
(
features
)
feature_spec_columns
.
append
(
fc
.
numeric_column
(
"click_label"
))
feature_spec_columns
.
append
(
fc
.
numeric_column
(
"conversion_label"
))
feature_spec
=
fc
.
make_parse_example_spec
(
feature_spec_columns
)
def
model_export
(
model
,
feature_columns
,
save_path
):
feature_spec
=
fc
.
make_parse_example_spec
(
feature_columns
)
serving_input_fn
=
tf
.
estimator
.
export
.
build_parsing_serving_input_receiver_fn
(
feature_spec
)
model
.
export_saved_model
(
save_path
,
serving_input_fn
,
as_text
=
True
)
path
=
str
(
model
.
export_saved_model
(
save_path
,
serving_input_fn
),
encoding
=
"utf-8"
)
return
path
def
_int64_feature
(
value
):
return
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
value
]))
def
_float_feature
(
value
):
return
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
[
value
]))
def
_bytes_feature
(
value
):
return
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
value
]))
def
model_predict
(
inputs
,
model_path
):
predict_fn
=
tf
.
contrib
.
predictor
.
from_saved_model
(
model_path
)
int_columns
=
[
"active_type"
,
"active_days"
,
"card_id"
,
"is_pure_author"
,
"is_have_reply"
,
"is_have_pure_reply"
,
"content_level"
,
"topic_num"
,
"favor_num"
,
"vote_num"
]
float_columns
=
[
"one_ctr"
,
"three_ctr"
,
"seven_ctr"
,
"fifteen_ctr"
]
examples
=
[]
for
index
,
row
in
inputs
.
iterrows
():
features
=
{}
for
col
,
value
in
row
.
iteritems
():
if
col
in
[
"click_label"
,
"conversion_label"
]:
pass
elif
col
in
int_columns
:
features
[
col
]
=
_int64_feature
(
value
)
elif
col
in
float_columns
:
features
[
col
]
=
_float_feature
(
value
)
else
:
features
[
col
]
=
_bytes_feature
(
str
(
value
)
.
encode
(
encoding
=
"utf-8"
))
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
features
))
examples
.
append
(
example
.
SerializeToString
())
predictions
=
predict_fn
({
"examples"
:
examples
})
print
(
predictions
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment