Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
gm_strategy_cvr
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
gm_strategy_cvr
Commits
b362c073
Commit
b362c073
authored
Aug 21, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
service feature
parent
52f719f1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
166 additions
and
22 deletions
+166
-22
pyrightconfig.json
pyrightconfig.json
+3
-0
diary_model.py
src/models/esmm/diary_model.py
+52
-5
device_fe.py
src/models/esmm/fe/device_fe.py
+39
-8
diary_fe.py
src/models/esmm/fe/diary_fe.py
+68
-7
train_diary.py
src/train_diary.py
+4
-2
No files found.
pyrightconfig.json
0 → 100644
View file @
b362c073
{
"executionEnvironments"
:
[{
"root"
:
"src"
}]
}
src/models/esmm/diary_model.py
View file @
b362c073
...
...
@@ -87,6 +87,11 @@ _int_columns = [
"first_positions_num"
,
"second_positions_num"
,
"projects_num"
,
"is_related_service"
,
"effect_second_skip_num"
,
"business_second_skip_num"
,
"service_price"
,
"service_sold_num"
,
]
_float_columns
=
[
"one_ctr"
,
...
...
@@ -121,13 +126,55 @@ _float_columns = [
"sixty_browse_duration_avg"
,
"ninety_browse_duration_avg"
,
"history_browse_duration_avg"
,
"effect_second_skip_rate"
,
"business_second_skip_rate"
,
]
_categorical_columns
=
[
"device_id"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"device_fd"
,
"device_sd"
,
"device_fs"
,
"device_ss"
,
"device_fp"
,
"device_sp"
,
"device_p"
,
"content_fd"
,
"content_sd"
,
"content_fs"
,
"content_ss"
,
"content_fp"
,
"content_sp"
,
"content_p"
,
"fd1"
,
"fd2"
,
"fd3"
,
"sd1"
,
"sd2"
,
"sd3"
,
"fs1"
,
"fs2"
,
"fs3"
,
"ss1"
,
"ss2"
,
"ss3"
,
"fp1"
,
"fp2"
,
"fp3"
,
"sp1"
,
"sp2"
,
"sp3"
,
"p1"
,
"p2"
,
"p3"
,
"click_diary_id1"
,
"click_diary_id2"
,
"click_diary_id3"
,
"click_diary_id4"
,
"click_diary_id5"
"device_id"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"device_fd"
,
"device_sd"
,
"device_fs"
,
"device_ss"
,
"device_fp"
,
"device_sp"
,
"device_p"
,
"content_fd"
,
"content_sd"
,
"content_fs"
,
"content_ss"
,
"content_fp"
,
"content_sp"
,
"content_p"
,
"fd1"
,
"fd2"
,
"fd3"
,
"sd1"
,
"sd2"
,
"sd3"
,
"fs1"
,
"fs2"
,
"fs3"
,
"ss1"
,
"ss2"
,
"ss3"
,
"fp1"
,
"fp2"
,
"fp3"
,
"sp1"
,
"sp2"
,
"sp3"
,
"p1"
,
"p2"
,
"p3"
,
"click_diary_id1"
,
"click_diary_id2"
,
"click_diary_id3"
,
"click_diary_id4"
,
"click_diary_id5"
,
"service_city"
,
]
PREDICTION_ALL_COLUMNS
=
_int_columns
+
_float_columns
+
_categorical_columns
...
...
src/models/esmm/fe/device_fe.py
View file @
b362c073
import
pandas
as
pd
from
utils.cache
import
redis_db_client
# "channel_first", "city_first", "model_first",
DIARY_DEVICE_COLUMNS
=
[
"device_id"
,
"active_type"
,
"active_days"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"first_demands"
,
"second_demands"
,
"first_solutions"
,
"second_solutions"
,
"first_positions"
,
"second_positions"
,
"projects"
,
"click_diary_id1"
,
"click_diary_id2"
,
"click_diary_id3"
,
"click_diary_id4"
,
"click_diary_id5"
"device_id"
,
"active_type"
,
"active_days"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"first_demands"
,
"second_demands"
,
"first_solutions"
,
"second_solutions"
,
"first_positions"
,
"second_positions"
,
"projects"
,
"click_diary_id1"
,
"click_diary_id2"
,
"click_diary_id3"
,
"click_diary_id4"
,
"click_diary_id5"
,
]
TRACTATE_DEVICE_COLUMNS
=
[
"device_id"
,
"active_type"
,
"active_days"
,
"channel_first"
,
"city_first"
,
"model_first"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"first_demands"
,
"second_demands"
,
"first_solutions"
,
"second_solutions"
,
"first_positions"
,
"second_positions"
,
"projects"
,
"click_tractate_id1"
,
"click_tractate_id2"
,
"click_tractate_id3"
,
"click_tractate_id4"
,
"click_tractate_id5"
"device_id"
,
"active_type"
,
"active_days"
,
"channel_first"
,
"city_first"
,
"model_first"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"first_demands"
,
"second_demands"
,
"first_solutions"
,
"second_solutions"
,
"first_positions"
,
"second_positions"
,
"projects"
,
"click_tractate_id1"
,
"click_tractate_id2"
,
"click_tractate_id3"
,
"click_tractate_id4"
,
"click_tractate_id5"
,
]
...
...
src/models/esmm/fe/diary_fe.py
View file @
b362c073
...
...
@@ -124,6 +124,14 @@ DIARY_COLUMNS = [
"first_positions_num"
,
"second_positions_num"
,
"projects_num"
,
"is_related_service"
,
"effect_second_skip_num"
,
"business_second_skip_num"
,
"effect_second_skip_rate"
,
"business_second_skip_rate"
,
"service_price"
,
"service_sold_num"
,
"service_city"
,
]
INT_COLUMNS
=
[
"active_days"
,
...
...
@@ -201,6 +209,10 @@ INT_COLUMNS = [
"first_positions_num"
,
"second_positions_num"
,
"projects_num"
,
"effect_second_skip_num"
,
"business_second_skip_num"
,
"service_price"
,
"service_sold_num"
,
]
FLOAT_COLUMNS
=
[
"one_ctr"
,
...
...
@@ -235,14 +247,62 @@ FLOAT_COLUMNS = [
"sixty_browse_duration_avg"
,
"ninety_browse_duration_avg"
,
"history_browse_duration_avg"
,
"effect_second_skip_rate"
,
"business_second_skip_rate"
,
]
CATEGORICAL_COLUMNS
=
[
"device_id"
,
"active_type"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"card_id"
,
"is_pure_author"
,
"is_have_reply"
,
"is_have_pure_reply"
,
"content_level"
,
"device_fd"
,
"content_fd"
,
"fd1"
,
"fd2"
,
"fd3"
,
"device_sd"
,
"content_sd"
,
"sd1"
,
"sd2"
,
"sd3"
,
"device_fs"
,
"content_fs"
,
"fs1"
,
"fs2"
,
"fs3"
,
"device_ss"
,
"content_ss"
,
"ss1"
,
"ss2"
,
"ss3"
,
"device_fp"
,
"content_fp"
,
"fp1"
,
"fp2"
,
"fp3"
,
"device_sp"
,
"content_sp"
,
"sp1"
,
"sp2"
,
"sp3"
,
"device_p"
,
"content_p"
,
"p1"
,
"p2"
,
"p3"
,
"click_diary_id1"
,
"click_diary_id2"
,
"click_diary_id3"
,
"click_diary_id4"
,
"click_diary_id5"
"device_id"
,
"active_type"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"card_id"
,
"is_pure_author"
,
"is_have_reply"
,
"is_have_pure_reply"
,
"content_level"
,
"device_fd"
,
"content_fd"
,
"fd1"
,
"fd2"
,
"fd3"
,
"device_sd"
,
"content_sd"
,
"sd1"
,
"sd2"
,
"sd3"
,
"device_fs"
,
"content_fs"
,
"fs1"
,
"fs2"
,
"fs3"
,
"device_ss"
,
"content_ss"
,
"ss1"
,
"ss2"
,
"ss3"
,
"device_fp"
,
"content_fp"
,
"fp1"
,
"fp2"
,
"fp3"
,
"device_sp"
,
"content_sp"
,
"sp1"
,
"sp2"
,
"sp3"
,
"device_p"
,
"content_p"
,
"p1"
,
"p2"
,
"p3"
,
"click_diary_id1"
,
"click_diary_id2"
,
"click_diary_id3"
,
"click_diary_id4"
,
"click_diary_id5"
,
"is_related_service"
,
"service_city"
,
]
...
...
@@ -275,7 +335,7 @@ def get_diary_dict_from_redis():
if
""
in
tmp
[
col_name
]:
tmp
[
col_name
]
.
remove
(
""
)
tmp
[
col_name
+
"_num"
]
=
len
(
tmp
[
col_name
])
elif
col_name
in
[
"is_pure_author"
,
"is_have_pure_reply"
,
"is_have_reply"
]:
elif
col_name
in
[
"is_pure_author"
,
"is_have_pure_reply"
,
"is_have_reply"
,
"is_related_service"
]:
if
elem
==
"true"
:
tmp
[
col_name
]
=
1
else
:
...
...
@@ -316,6 +376,7 @@ def diary_feature_engineering(df):
diary_df
[
"is_pure_author"
]
=
diary_df
[
"is_pure_author"
]
.
astype
(
int
)
diary_df
[
"is_have_pure_reply"
]
=
diary_df
[
"is_have_pure_reply"
]
.
astype
(
int
)
diary_df
[
"is_have_reply"
]
=
diary_df
[
"is_have_reply"
]
.
astype
(
int
)
diary_df
[
"is_related_service"
]
=
diary_df
[
"is_related_service"
]
.
astype
(
int
)
diary_df
=
diary_df
[
DIARY_COLUMNS
]
...
...
src/train_diary.py
View file @
b362c073
...
...
@@ -61,7 +61,8 @@ def main():
estimator_config
=
tf
.
estimator
.
RunConfig
(
session_config
=
session_config
)
model
=
tf
.
estimator
.
Estimator
(
model_fn
=
esmm_model_fn
,
params
=
params
,
model_dir
=
model_path
,
config
=
estimator_config
)
train_spec
=
tf
.
estimator
.
TrainSpec
(
input_fn
=
lambda
:
esmm_input_fn
(
train_df
,
shuffle
=
True
),
max_steps
=
50000
)
# TODO 50000
train_spec
=
tf
.
estimator
.
TrainSpec
(
input_fn
=
lambda
:
esmm_input_fn
(
train_df
,
shuffle
=
True
),
max_steps
=
15000
)
eval_spec
=
tf
.
estimator
.
EvalSpec
(
input_fn
=
lambda
:
esmm_input_fn
(
val_df
,
shuffle
=
False
))
res
=
tf
.
estimator
.
train_and_evaluate
(
model
,
train_spec
,
eval_spec
)
print
(
"@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
)
...
...
@@ -73,7 +74,8 @@ def main():
model_export_path
=
str
(
Path
(
"/data/files/models/diary"
)
.
expanduser
())
save_path
=
model_export
(
model
,
all_features
,
model_export_path
)
print
(
"save to: "
+
save_path
)
set_essm_model_save_path
(
"diary"
,
save_path
)
# TODO save
# set_essm_model_save_path("diary", save_path)
print
(
"============================================================"
)
# save_path = str(Path("~/Desktop/models/1596012827").expanduser()) # local
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment