Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
gm_strategy_cvr
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
gm_strategy_cvr
Commits
72ecf50f
Commit
72ecf50f
authored
Jul 21, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add printer
parent
a08d32a5
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
32 additions
and
27 deletions
+32
-27
main.py
src/main.py
+31
-26
model.py
src/models/esmm/model.py
+1
-1
No files found.
src/main.py
View file @
72ecf50f
...
...
@@ -20,36 +20,36 @@ from models.esmm.model import esmm_model_fn, model_export, model_predict
def
main
():
time_begin
=
time
.
time
()
device_df
,
diary_df
,
click_df
,
conversion_df
=
read_csv_data
(
Path
(
"~/data/cvr_data/"
))
# print(diary_df.sample(1))
device_df
=
device_feature_engineering
(
device_df
)
# print(device_df.sample(1))
diary_df
=
diary_feature_engineering
(
diary_df
)
# print(diary_df.sample(1))
cc_df
=
click_feature_engineering
(
click_df
,
conversion_df
)
df
=
join_features
(
device_df
,
diary_df
,
cc_df
)
#
device_df, diary_df, click_df, conversion_df = read_csv_data(Path("~/data/cvr_data/"))
#
#
print(diary_df.sample(1))
#
device_df = device_feature_engineering(device_df)
#
#
print(device_df.sample(1))
#
diary_df = diary_feature_engineering(diary_df)
#
#
print(diary_df.sample(1))
#
cc_df = click_feature_engineering(click_df, conversion_df)
#
df = join_features(device_df, diary_df, cc_df)
train_df
,
test_df
=
train_test_split
(
df
,
test_size
=
0.2
)
train_df
,
val_df
=
train_test_split
(
train_df
,
test_size
=
0.2
)
#
train_df, test_df = train_test_split(df, test_size=0.2)
#
train_df, val_df = train_test_split(train_df, test_size=0.2)
all_features
=
build_features
(
df
)
#
all_features = build_features(df)
params
=
{
"feature_columns"
:
all_features
,
"hidden_units"
:
[
64
,
32
],
"learning_rate"
:
0.1
}
model_path
=
str
(
Path
(
"~/data/model_tmp/"
)
.
expanduser
())
if
os
.
path
.
exists
(
model_path
):
shutil
.
rmtree
(
model_path
)
model
=
tf
.
estimator
.
Estimator
(
model_fn
=
esmm_model_fn
,
params
=
params
,
model_dir
=
model_path
)
#
params = {"feature_columns": all_features, "hidden_units": [64, 32], "learning_rate": 0.1}
#
model_path = str(Path("~/data/model_tmp/").expanduser())
#
if os.path.exists(model_path):
#
shutil.rmtree(model_path)
#
model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path)
print
(
"train"
)
model
.
train
(
input_fn
=
lambda
:
esmm_input_fn
(
train_df
,
shuffle
=
True
),
steps
=
5000
)
metrics
=
model
.
evaluate
(
input_fn
=
lambda
:
esmm_input_fn
(
val_df
,
False
),
steps
=
5000
)
print
(
"metrics: "
+
str
(
metrics
))
#
print("train")
#
model.train(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), steps=5000)
#
metrics = model.evaluate(input_fn=lambda: esmm_input_fn(val_df, False), steps=5000)
#
print("metrics: " + str(metrics))
model_export_path
=
str
(
Path
(
"~/data/models/"
)
.
expanduser
())
save_path
=
model_export
(
model
,
all_features
,
model_export_path
)
print
(
"save to: "
+
save_path
)
#
model_export_path = str(Path("~/data/models/").expanduser())
#
save_path = model_export(model, all_features, model_export_path)
#
print("save to: " + save_path)
predict_fn
=
tf
.
contrib
.
predictor
.
from_saved_model
(
save_path
)
#
predict_fn = tf.contrib.predictor.from_saved_model(save_path)
# for i in range(10):
# test_300 = test_df.sample(300)
...
...
@@ -63,13 +63,18 @@ def main():
df
=
get_device_df_from_redis
()
df2
=
get_diary_df_from_redis
()
redis_device_df
=
device_feature_engineering
(
df
)
redis_diary_df
=
diary_feature_engineering
(
df2
,
from_redis
=
True
)
time_1
=
timeit
.
default_timer
()
res
=
join_device_diary
(
device_id
,
diary_ids
,
redis_device_df
,
redis_diary_df
)
print
(
len
(
res
))
print
(
res
.
sample
(
1
),
"
\n
"
)
print
(
res
.
sample
(
1
))
model_predict
(
res
,
predict_fn
)
# model_predict(res, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
# print("prediction total cost {:.5f}s".format(total_1))
total_time
=
(
time
.
time
()
-
time_begin
)
/
60
print
(
"cost {:.2f} mins at {}"
.
format
(
total_time
,
datetime
.
now
()))
...
...
src/models/esmm/model.py
View file @
72ecf50f
...
...
@@ -113,6 +113,6 @@ def model_predict(inputs, predict_fn):
time_1
=
timeit
.
default_timer
()
predictions
=
predict_fn
({
"examples"
:
examples
})
total_1
=
(
timeit
.
default_timer
()
-
time_1
)
print
(
"prediction cost {:.5f}
s"
.
format
(
total_1
))
print
(
"prediction cost {:.5f}s"
.
format
(
total_1
))
# print(predictions)
return
predictions
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment