Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
gm_strategy_cvr
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
gm_strategy_cvr
Commits
1a32da74
Commit
1a32da74
authored
Aug 14, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
retrain
parent
883c544f
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
154 additions
and
154 deletions
+154
-154
diary_model.py
src/models/esmm/diary_model.py
+16
-16
diary_fe.py
src/models/esmm/fe/diary_fe.py
+32
-32
tractate_fe.py
src/models/esmm/fe/tractate_fe.py
+32
-32
tractate_model.py
src/models/esmm/tractate_model.py
+16
-16
online_prediction.py
src/online_prediction.py
+2
-2
train_diary.py
src/train_diary.py
+56
-56
No files found.
src/models/esmm/diary_model.py
View file @
1a32da74
...
...
@@ -72,22 +72,6 @@ _int_columns = [
"sixty_browse_user_num"
,
"ninety_browse_user_num"
,
"history_browse_user_num"
,
"one_vote_pure_rate"
,
"three_vote_pure_rate"
,
"seven_vote_pure_rate"
,
"fifteen_vote_pure_rate"
,
"thirty_vote_pure_rate"
,
"sixty_vote_pure_rate"
,
"ninety_vote_pure_rate"
,
"history_vote_pure_rate"
,
"one_reply_pure_rate"
,
"three_reply_pure_rate"
,
"seven_reply_pure_rate"
,
"fifteen_reply_pure_rate"
,
"thirty_reply_pure_rate"
,
"sixty_reply_pure_rate"
,
"ninety_reply_pure_rate"
,
"history_reply_pure_rate"
,
]
_float_columns
=
[
"one_ctr"
,
...
...
@@ -98,6 +82,22 @@ _float_columns = [
"sixty_ctr"
,
"ninety_ctr"
,
"history_ctr"
,
# "one_vote_pure_rate",
# "three_vote_pure_rate",
# "seven_vote_pure_rate",
# "fifteen_vote_pure_rate",
# "thirty_vote_pure_rate",
# "sixty_vote_pure_rate",
# "ninety_vote_pure_rate",
# "history_vote_pure_rate",
# "one_reply_pure_rate",
# "three_reply_pure_rate",
# "seven_reply_pure_rate",
# "fifteen_reply_pure_rate",
# "thirty_reply_pure_rate",
# "sixty_reply_pure_rate",
# "ninety_reply_pure_rate",
# "history_reply_pure_rate",
]
_categorical_columns
=
[
"device_id"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"device_fd"
,
...
...
src/models/esmm/fe/diary_fe.py
View file @
1a32da74
...
...
@@ -70,22 +70,22 @@ DIARY_COLUMNS = [
"sixty_browse_user_num"
,
"ninety_browse_user_num"
,
"history_browse_user_num"
,
"one_vote_pure_rate"
,
"three_vote_pure_rate"
,
"seven_vote_pure_rate"
,
"fifteen_vote_pure_rate"
,
"thirty_vote_pure_rate"
,
"sixty_vote_pure_rate"
,
"ninety_vote_pure_rate"
,
"history_vote_pure_rate"
,
"one_reply_pure_rate"
,
"three_reply_pure_rate"
,
"seven_reply_pure_rate"
,
"fifteen_reply_pure_rate"
,
"thirty_reply_pure_rate"
,
"sixty_reply_pure_rate"
,
"ninety_reply_pure_rate"
,
"history_reply_pure_rate"
,
#
"one_vote_pure_rate",
#
"three_vote_pure_rate",
#
"seven_vote_pure_rate",
#
"fifteen_vote_pure_rate",
#
"thirty_vote_pure_rate",
#
"sixty_vote_pure_rate",
#
"ninety_vote_pure_rate",
#
"history_vote_pure_rate",
#
"one_reply_pure_rate",
#
"three_reply_pure_rate",
#
"seven_reply_pure_rate",
#
"fifteen_reply_pure_rate",
#
"thirty_reply_pure_rate",
#
"sixty_reply_pure_rate",
#
"ninety_reply_pure_rate",
#
"history_reply_pure_rate",
"one_ctr"
,
"three_ctr"
,
"seven_ctr"
,
...
...
@@ -173,22 +173,22 @@ FLOAT_COLUMNS = [
"sixty_ctr"
,
"ninety_ctr"
,
"history_ctr"
,
"one_vote_pure_rate"
,
"three_vote_pure_rate"
,
"seven_vote_pure_rate"
,
"fifteen_vote_pure_rate"
,
"thirty_vote_pure_rate"
,
"sixty_vote_pure_rate"
,
"ninety_vote_pure_rate"
,
"history_vote_pure_rate"
,
"one_reply_pure_rate"
,
"three_reply_pure_rate"
,
"seven_reply_pure_rate"
,
"fifteen_reply_pure_rate"
,
"thirty_reply_pure_rate"
,
"sixty_reply_pure_rate"
,
"ninety_reply_pure_rate"
,
"history_reply_pure_rate"
,
#
"one_vote_pure_rate",
#
"three_vote_pure_rate",
#
"seven_vote_pure_rate",
#
"fifteen_vote_pure_rate",
#
"thirty_vote_pure_rate",
#
"sixty_vote_pure_rate",
#
"ninety_vote_pure_rate",
#
"history_vote_pure_rate",
#
"one_reply_pure_rate",
#
"three_reply_pure_rate",
#
"seven_reply_pure_rate",
#
"fifteen_reply_pure_rate",
#
"thirty_reply_pure_rate",
#
"sixty_reply_pure_rate",
#
"ninety_reply_pure_rate",
#
"history_reply_pure_rate",
]
CATEGORICAL_COLUMNS
=
[
"device_id"
,
"active_type"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
...
...
src/models/esmm/fe/tractate_fe.py
View file @
1a32da74
...
...
@@ -62,22 +62,22 @@ TRACTATE_COLUMNS = [
"sixty_browse_user_num"
,
"ninety_browse_user_num"
,
"history_browse_user_num"
,
"one_vote_pure_rate"
,
"three_vote_pure_rate"
,
"seven_vote_pure_rate"
,
"fifteen_vote_pure_rate"
,
"thirty_vote_pure_rate"
,
"sixty_vote_pure_rate"
,
"ninety_vote_pure_rate"
,
"history_vote_pure_rate"
,
"one_reply_pure_rate"
,
"three_reply_pure_rate"
,
"seven_reply_pure_rate"
,
"fifteen_reply_pure_rate"
,
"thirty_reply_pure_rate"
,
"sixty_reply_pure_rate"
,
"ninety_reply_pure_rate"
,
"history_reply_pure_rate"
,
#
"one_vote_pure_rate",
#
"three_vote_pure_rate",
#
"seven_vote_pure_rate",
#
"fifteen_vote_pure_rate",
#
"thirty_vote_pure_rate",
#
"sixty_vote_pure_rate",
#
"ninety_vote_pure_rate",
#
"history_vote_pure_rate",
#
"one_reply_pure_rate",
#
"three_reply_pure_rate",
#
"seven_reply_pure_rate",
#
"fifteen_reply_pure_rate",
#
"thirty_reply_pure_rate",
#
"sixty_reply_pure_rate",
#
"ninety_reply_pure_rate",
#
"history_reply_pure_rate",
"one_ctr"
,
"three_ctr"
,
"seven_ctr"
,
...
...
@@ -157,22 +157,22 @@ FLOAT_COLUMNS = [
"sixty_ctr"
,
"ninety_ctr"
,
"history_ctr"
,
"one_vote_pure_rate"
,
"three_vote_pure_rate"
,
"seven_vote_pure_rate"
,
"fifteen_vote_pure_rate"
,
"thirty_vote_pure_rate"
,
"sixty_vote_pure_rate"
,
"ninety_vote_pure_rate"
,
"history_vote_pure_rate"
,
"one_reply_pure_rate"
,
"three_reply_pure_rate"
,
"seven_reply_pure_rate"
,
"fifteen_reply_pure_rate"
,
"thirty_reply_pure_rate"
,
"sixty_reply_pure_rate"
,
"ninety_reply_pure_rate"
,
"history_reply_pure_rate"
,
#
"one_vote_pure_rate",
#
"three_vote_pure_rate",
#
"seven_vote_pure_rate",
#
"fifteen_vote_pure_rate",
#
"thirty_vote_pure_rate",
#
"sixty_vote_pure_rate",
#
"ninety_vote_pure_rate",
#
"history_vote_pure_rate",
#
"one_reply_pure_rate",
#
"three_reply_pure_rate",
#
"seven_reply_pure_rate",
#
"fifteen_reply_pure_rate",
#
"thirty_reply_pure_rate",
#
"sixty_reply_pure_rate",
#
"ninety_reply_pure_rate",
#
"history_reply_pure_rate",
]
CATEGORICAL_COLUMNS
=
[
"device_id"
,
"active_type"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
...
...
src/models/esmm/tractate_model.py
View file @
1a32da74
...
...
@@ -73,22 +73,22 @@ _float_columns = [
"sixty_ctr"
,
"ninety_ctr"
,
"history_ctr"
,
"one_vote_pure_rate"
,
"three_vote_pure_rate"
,
"seven_vote_pure_rate"
,
"fifteen_vote_pure_rate"
,
"thirty_vote_pure_rate"
,
"sixty_vote_pure_rate"
,
"ninety_vote_pure_rate"
,
"history_vote_pure_rate"
,
"one_reply_pure_rate"
,
"three_reply_pure_rate"
,
"seven_reply_pure_rate"
,
"fifteen_reply_pure_rate"
,
"thirty_reply_pure_rate"
,
"sixty_reply_pure_rate"
,
"ninety_reply_pure_rate"
,
"history_reply_pure_rate"
,
#
"one_vote_pure_rate",
#
"three_vote_pure_rate",
#
"seven_vote_pure_rate",
#
"fifteen_vote_pure_rate",
#
"thirty_vote_pure_rate",
#
"sixty_vote_pure_rate",
#
"ninety_vote_pure_rate",
#
"history_vote_pure_rate",
#
"one_reply_pure_rate",
#
"three_reply_pure_rate",
#
"seven_reply_pure_rate",
#
"fifteen_reply_pure_rate",
#
"thirty_reply_pure_rate",
#
"sixty_reply_pure_rate",
#
"ninety_reply_pure_rate",
#
"history_reply_pure_rate",
]
_categorical_columns
=
[
"device_id"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"show_tag_id"
,
...
...
src/online_prediction.py
View file @
1a32da74
...
...
@@ -45,13 +45,13 @@ if __name__ == "__main__":
diary_save_path
=
get_essm_model_save_path
(
"diary"
)
if
not
diary_save_path
:
diary_save_path
=
"/home/gmuser/data/models/diary/1597
050209
"
diary_save_path
=
"/home/gmuser/data/models/diary/1597
379800
"
print
(
diary_save_path
+
"!!!!!!!!!!!!!!!!!!!!!!!!!!!"
)
diary_predict_fn
=
tf
.
contrib
.
predictor
.
from_saved_model
(
diary_save_path
)
tractate_save_path
=
get_essm_model_save_path
(
"tractate"
)
if
not
tractate_save_path
:
tractate_save_path
=
"/home/gmuser/data/models/tractate/159
6509299
"
tractate_save_path
=
"/home/gmuser/data/models/tractate/159
7378202
"
print
(
tractate_save_path
+
"!!!!!!!!!!!!!!!!!!!!!!!!!!!"
)
tractate_predict_fn
=
tf
.
contrib
.
predictor
.
from_saved_model
(
tractate_save_path
)
...
...
src/train_diary.py
View file @
1a32da74
...
...
@@ -9,8 +9,7 @@ from pathlib import Path
import
tensorflow
as
tf
from
sklearn.model_selection
import
train_test_split
from
models.esmm.diary_model
import
(
PREDICTION_ALL_COLUMNS
,
model_predict_diary
)
from
models.esmm.diary_model
import
PREDICTION_ALL_COLUMNS
,
model_predict_diary
from
models.esmm.fe
import
click_fe
,
device_fe
,
diary_fe
,
fe
from
models.esmm.input_fn
import
esmm_input_fn
from
models.esmm.model
import
esmm_model_fn
,
model_export
...
...
@@ -24,70 +23,71 @@ def main():
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
#
diary_train_columns = set(diary_fe.INT_COLUMNS + diary_fe.FLOAT_COLUMNS + diary_fe.CATEGORICAL_COLUMNS)
#
print("features: " + str(len(diary_train_columns)))
#
diary_predict_columns = set(PREDICTION_ALL_COLUMNS)
#
print(diary_predict_columns.difference(diary_train_columns))
#
print(diary_train_columns.difference(diary_predict_columns))
#
assert diary_predict_columns == diary_train_columns
#
#
dataset_path = Path("~/data/cvr_data").expanduser() # local
#
dataset_path = Path("/srv/apps/node2vec_git/cvr_data/") # server
#
diary_df, diary_click_df, diary_conversion_df = diary_fe.read_csv_data(dataset_path)
#
#
print(diary_df.sample(1))
#
diary_df = diary_fe.diary_feature_engineering(diary_df)
#
#
print(diary_df.sample(1))
#
device_df = device_fe.read_csv_data(dataset_path)
#
#
print(diary_df.sample(1))
#
device_df = device_fe.device_feature_engineering(device_df, "diary")
#
#
print(device_df.sample(1))
#
cc_df = click_fe.click_feature_engineering(diary_click_df, diary_conversion_df)
#
#
print(cc_df.sample(1))
#
df = diary_fe.join_features(device_df, diary_df, cc_df)
#
#
print(df.sample(1))
#
#
print(df.dtypes)
#
train_df, test_df = train_test_split(df, test_size=0.2)
#
train_df, val_df = train_test_split(train_df, test_size=0.2)
#
all_features = fe.build_features(df, diary_fe.INT_COLUMNS, diary_fe.FLOAT_COLUMNS, diary_fe.CATEGORICAL_COLUMNS)
#
params = {"feature_columns": all_features, "hidden_units": [64, 32], "learning_rate": 0.1}
#
model_path = str(Path("~/data/model_tmp/diary/").expanduser())
#
if os.path.exists(model_path):
#
shutil.rmtree(model_path)
#
session_config = tf.compat.v1.ConfigProto()
#
session_config.gpu_options.allow_growth = True
#
session_config.gpu_options.per_process_gpu_memory_fraction = 0.9
#
estimator_config = tf.estimator.RunConfig(session_config=session_config)
#
model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
#
train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=50000)
#
eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False))
#
res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
#
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
#
print(res[0])
#
print("ctr_auc: " + str(res[0]["ctr_auc"]))
#
print("ctcvr_auc: " + str(res[0]["ctcvr_auc"]))
#
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
#
model_export_path = str(Path("~/data/models/diary").expanduser())
#
save_path = model_export(model, all_features, model_export_path)
#
print("save to: " + save_path)
#
set_essm_model_save_path("diary", save_path)
#
print("============================================================")
diary_train_columns
=
set
(
diary_fe
.
INT_COLUMNS
+
diary_fe
.
FLOAT_COLUMNS
+
diary_fe
.
CATEGORICAL_COLUMNS
)
print
(
"features: "
+
str
(
len
(
diary_train_columns
)))
diary_predict_columns
=
set
(
PREDICTION_ALL_COLUMNS
)
print
(
diary_predict_columns
.
difference
(
diary_train_columns
))
print
(
diary_train_columns
.
difference
(
diary_predict_columns
))
assert
diary_predict_columns
==
diary_train_columns
# dataset_path = Path("~/data/cvr_data").expanduser() # local
dataset_path
=
Path
(
"/srv/apps/node2vec_git/cvr_data/"
)
# server
diary_df
,
diary_click_df
,
diary_conversion_df
=
diary_fe
.
read_csv_data
(
dataset_path
)
# print(diary_df.sample(1))
diary_df
=
diary_fe
.
diary_feature_engineering
(
diary_df
)
# print(diary_df.sample(1))
device_df
=
device_fe
.
read_csv_data
(
dataset_path
)
# print(diary_df.sample(1))
device_df
=
device_fe
.
device_feature_engineering
(
device_df
,
"diary"
)
# print(device_df.sample(1))
cc_df
=
click_fe
.
click_feature_engineering
(
diary_click_df
,
diary_conversion_df
)
# print(cc_df.sample(1))
df
=
diary_fe
.
join_features
(
device_df
,
diary_df
,
cc_df
)
# print(df.sample(1))
# print(df.dtypes)
train_df
,
test_df
=
train_test_split
(
df
,
test_size
=
0.2
)
train_df
,
val_df
=
train_test_split
(
train_df
,
test_size
=
0.2
)
all_features
=
fe
.
build_features
(
df
,
diary_fe
.
INT_COLUMNS
,
diary_fe
.
FLOAT_COLUMNS
,
diary_fe
.
CATEGORICAL_COLUMNS
)
params
=
{
"feature_columns"
:
all_features
,
"hidden_units"
:
[
64
,
32
],
"learning_rate"
:
0.1
}
model_path
=
str
(
Path
(
"~/data/model_tmp/diary/"
)
.
expanduser
())
if
os
.
path
.
exists
(
model_path
):
shutil
.
rmtree
(
model_path
)
session_config
=
tf
.
compat
.
v1
.
ConfigProto
()
session_config
.
gpu_options
.
allow_growth
=
True
session_config
.
gpu_options
.
per_process_gpu_memory_fraction
=
0.9
estimator_config
=
tf
.
estimator
.
RunConfig
(
session_config
=
session_config
)
model
=
tf
.
estimator
.
Estimator
(
model_fn
=
esmm_model_fn
,
params
=
params
,
model_dir
=
model_path
,
config
=
estimator_config
)
train_spec
=
tf
.
estimator
.
TrainSpec
(
input_fn
=
lambda
:
esmm_input_fn
(
train_df
,
shuffle
=
True
),
max_steps
=
50000
)
eval_spec
=
tf
.
estimator
.
EvalSpec
(
input_fn
=
lambda
:
esmm_input_fn
(
val_df
,
shuffle
=
False
))
res
=
tf
.
estimator
.
train_and_evaluate
(
model
,
train_spec
,
eval_spec
)
print
(
"@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
)
print
(
res
[
0
])
print
(
"ctr_auc: "
+
str
(
res
[
0
][
"ctr_auc"
]))
print
(
"ctcvr_auc: "
+
str
(
res
[
0
][
"ctcvr_auc"
]))
print
(
"@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
)
model_export_path
=
str
(
Path
(
"~/data/models/diary"
)
.
expanduser
())
save_path
=
model_export
(
model
,
all_features
,
model_export_path
)
print
(
"save to: "
+
save_path
)
set_essm_model_save_path
(
"diary"
,
save_path
)
print
(
"============================================================"
)
# save_path = str(Path("~/Desktop/models/1596012827").expanduser()) # local
# save_path = "/home/gmuser/data/models/diary/1596083349" # server
# tf.saved_model.load
save_path
=
get_essm_model_save_path
(
"diary"
)
# save_path = get_essm_model_save_path("diary")
# print("load path: " + save_path)
predict_fn
=
tf
.
contrib
.
predictor
.
from_saved_model
(
save_path
)
device_dict
=
device_fe
.
get_device_dict_from_redis
()
diary_dict
=
diary_fe
.
get_diary_dict_from_redis
()
print
(
"redis data: "
+
str
(
len
(
device_dict
))
+
" "
+
str
(
len
(
diary_dict
)))
print
(
"redis data: "
+
str
(
len
(
device_dict
))
+
" "
+
str
(
len
(
diary_dict
)))
device_ids
=
list
(
device_dict
.
keys
())[:
20
]
diary_ids
=
list
(
diary_dict
.
keys
())
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment