Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
gm_strategy_cvr
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
gm_strategy_cvr
Commits
1d9857f8
Commit
1d9857f8
authored
Jul 29, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update steps
parent
932e5762
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
10 additions
and
8 deletions
+10
-8
main.py
src/main.py
+7
-8
input_fn.py
src/models/esmm/input_fn.py
+2
-0
model.py
src/models/esmm/model.py
+1
-0
No files found.
src/main.py
View file @
1d9857f8
...
@@ -29,14 +29,14 @@ def main():
...
@@ -29,14 +29,14 @@ def main():
device_df
,
diary_df
,
click_df
,
conversion_df
=
diary_fe
.
read_csv_data
(
Path
(
"/srv/apps/node2vec_git/cvr_data/"
))
device_df
,
diary_df
,
click_df
,
conversion_df
=
diary_fe
.
read_csv_data
(
Path
(
"/srv/apps/node2vec_git/cvr_data/"
))
# print(diary_df.sample(1))
# print(diary_df.sample(1))
device_df
=
device_fe
.
device_feature_engineering
(
device_df
)
device_df
=
device_fe
.
device_feature_engineering
(
device_df
)
print
(
device_df
.
sample
(
1
))
#
print(device_df.sample(1))
diary_df
=
diary_fe
.
diary_feature_engineering
(
diary_df
)
diary_df
=
diary_fe
.
diary_feature_engineering
(
diary_df
)
print
(
diary_df
.
sample
(
1
))
#
print(diary_df.sample(1))
cc_df
=
diary_fe
.
click_feature_engineering
(
click_df
,
conversion_df
)
cc_df
=
diary_fe
.
click_feature_engineering
(
click_df
,
conversion_df
)
print
(
cc_df
.
sample
(
1
))
#
print(cc_df.sample(1))
df
=
diary_fe
.
join_features
(
device_df
,
diary_df
,
cc_df
)
df
=
diary_fe
.
join_features
(
device_df
,
diary_df
,
cc_df
)
print
(
df
.
sample
(
1
))
#
print(df.sample(1))
print
(
df
.
dtypes
)
#
print(df.dtypes)
train_df
,
test_df
=
train_test_split
(
df
,
test_size
=
0.2
)
train_df
,
test_df
=
train_test_split
(
df
,
test_size
=
0.2
)
train_df
,
val_df
=
train_test_split
(
train_df
,
test_size
=
0.2
)
train_df
,
val_df
=
train_test_split
(
train_df
,
test_size
=
0.2
)
...
@@ -53,8 +53,7 @@ def main():
...
@@ -53,8 +53,7 @@ def main():
estimator_config
=
tf
.
estimator
.
RunConfig
(
session_config
=
session_config
)
estimator_config
=
tf
.
estimator
.
RunConfig
(
session_config
=
session_config
)
model
=
tf
.
estimator
.
Estimator
(
model_fn
=
esmm_model_fn
,
params
=
params
,
model_dir
=
model_path
,
config
=
estimator_config
)
model
=
tf
.
estimator
.
Estimator
(
model_fn
=
esmm_model_fn
,
params
=
params
,
model_dir
=
model_path
,
config
=
estimator_config
)
# TODO 50000
train_spec
=
tf
.
estimator
.
TrainSpec
(
input_fn
=
lambda
:
esmm_input_fn
(
train_df
,
shuffle
=
True
),
max_steps
=
50000
)
train_spec
=
tf
.
estimator
.
TrainSpec
(
input_fn
=
lambda
:
esmm_input_fn
(
train_df
,
shuffle
=
True
),
max_steps
=
20000
)
eval_spec
=
tf
.
estimator
.
EvalSpec
(
input_fn
=
lambda
:
esmm_input_fn
(
val_df
,
shuffle
=
False
))
eval_spec
=
tf
.
estimator
.
EvalSpec
(
input_fn
=
lambda
:
esmm_input_fn
(
val_df
,
shuffle
=
False
))
tf
.
estimator
.
train_and_evaluate
(
model
,
train_spec
,
eval_spec
)
tf
.
estimator
.
train_and_evaluate
(
model
,
train_spec
,
eval_spec
)
...
@@ -74,7 +73,7 @@ def main():
...
@@ -74,7 +73,7 @@ def main():
predict_fn
=
tf
.
contrib
.
predictor
.
from_saved_model
(
save_path
)
predict_fn
=
tf
.
contrib
.
predictor
.
from_saved_model
(
save_path
)
print
(
"=============================="
)
print
(
"==============================
==============================
"
)
# device_id = "861601036552944"
# device_id = "861601036552944"
# diary_ids = [
# diary_ids = [
# "16195283", "16838351", "17161073", "17297878", "17307484", "17396235", "16418737", "16995481", "17312201", "12237988"
# "16195283", "16838351", "17161073", "17297878", "17307484", "17396235", "16418737", "16995481", "17312201", "12237988"
...
...
src/models/esmm/input_fn.py
View file @
1d9857f8
...
@@ -5,6 +5,7 @@ from .utils import create_boundaries, create_vocabulary_list
...
@@ -5,6 +5,7 @@ from .utils import create_boundaries, create_vocabulary_list
def
build_features
(
df
):
def
build_features
(
df
):
# TODO
int_columns
=
[
"active_days"
,
"topic_num"
,
"favor_num"
,
"vote_num"
]
int_columns
=
[
"active_days"
,
"topic_num"
,
"favor_num"
,
"vote_num"
]
float_columns
=
[
"one_ctr"
,
"three_ctr"
,
"seven_ctr"
,
"fifteen_ctr"
]
float_columns
=
[
"one_ctr"
,
"three_ctr"
,
"seven_ctr"
,
"fifteen_ctr"
]
numeric_features
=
[]
numeric_features
=
[]
...
@@ -14,6 +15,7 @@ def build_features(df):
...
@@ -14,6 +15,7 @@ def build_features(df):
else
:
else
:
numeric_features
.
append
(
fc
.
bucketized_column
(
fc
.
numeric_column
(
col
),
boundaries
=
create_boundaries
(
df
,
col
)))
numeric_features
.
append
(
fc
.
bucketized_column
(
fc
.
numeric_column
(
col
),
boundaries
=
create_boundaries
(
df
,
col
)))
# TODO
categorical_columns
=
[
categorical_columns
=
[
"device_id"
,
"active_type"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"device_id"
,
"active_type"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"card_id"
,
"is_pure_author"
,
"is_have_reply"
,
"is_have_pure_reply"
,
"content_level"
,
"price_sensitive_history"
,
"card_id"
,
"is_pure_author"
,
"is_have_reply"
,
"is_have_pure_reply"
,
"content_level"
,
...
...
src/models/esmm/model.py
View file @
1d9857f8
...
@@ -99,6 +99,7 @@ def model_predict_diary(device_id, diary_ids, device_dict, diary_dict, predict_f
...
@@ -99,6 +99,7 @@ def model_predict_diary(device_id, diary_ids, device_dict, diary_dict, predict_f
time_1
=
timeit
.
default_timer
()
time_1
=
timeit
.
default_timer
()
device_info
,
diary_lst
,
diary_ids_res
=
device_diary_fe
(
device_id
,
diary_ids
,
device_dict
,
diary_dict
)
device_info
,
diary_lst
,
diary_ids_res
=
device_diary_fe
(
device_id
,
diary_ids
,
device_dict
,
diary_dict
)
print
(
"predict check: "
+
str
(
len
(
diary_lst
))
+
" "
+
str
(
len
(
diary_ids_res
)))
print
(
"predict check: "
+
str
(
len
(
diary_lst
))
+
" "
+
str
(
len
(
diary_ids_res
)))
# TODO
int_columns
=
[
int_columns
=
[
"active_type"
,
"active_days"
,
"card_id"
,
"is_pure_author"
,
"is_have_reply"
,
"is_have_pure_reply"
,
"content_level"
,
"active_type"
,
"active_days"
,
"card_id"
,
"is_pure_author"
,
"is_have_reply"
,
"is_have_pure_reply"
,
"content_level"
,
"topic_num"
,
"favor_num"
,
"vote_num"
"topic_num"
,
"favor_num"
,
"vote_num"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment