Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
gm_strategy_cvr
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
gm_strategy_cvr
Commits
c60da982
Commit
c60da982
authored
Sep 15, 2020
by
赵威
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'fe' into 'offic'
Fe See merge request
!21
parents
e110acf5
a2df4105
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
114 additions
and
114 deletions
+114
-114
diary_model.py
src/models/esmm/diary_model.py
+26
-26
diary_fe.py
src/models/esmm/fe/diary_fe.py
+88
-88
No files found.
src/models/esmm/diary_model.py
View file @
c60da982
...
@@ -87,11 +87,11 @@ _int_columns = [
...
@@ -87,11 +87,11 @@ _int_columns = [
"first_positions_num"
,
"first_positions_num"
,
"second_positions_num"
,
"second_positions_num"
,
"projects_num"
,
"projects_num"
,
"is_related_service"
,
#
"is_related_service",
"effect_second_skip_num"
,
#
"effect_second_skip_num",
"business_second_skip_num"
,
#
"business_second_skip_num",
"service_price"
,
#
"service_price",
"service_sold_num"
,
#
"service_sold_num",
]
]
_float_columns
=
[
_float_columns
=
[
"one_ctr"
,
"one_ctr"
,
...
@@ -126,8 +126,8 @@ _float_columns = [
...
@@ -126,8 +126,8 @@ _float_columns = [
"sixty_browse_duration_avg"
,
"sixty_browse_duration_avg"
,
"ninety_browse_duration_avg"
,
"ninety_browse_duration_avg"
,
"history_browse_duration_avg"
,
"history_browse_duration_avg"
,
"effect_second_skip_rate"
,
#
"effect_second_skip_rate",
"business_second_skip_rate"
,
#
"business_second_skip_rate",
]
]
_categorical_columns
=
[
_categorical_columns
=
[
"device_id"
,
"device_id"
,
...
@@ -174,25 +174,25 @@ _categorical_columns = [
...
@@ -174,25 +174,25 @@ _categorical_columns = [
"click_diary_id3"
,
"click_diary_id3"
,
"click_diary_id4"
,
"click_diary_id4"
,
"click_diary_id5"
,
"click_diary_id5"
,
"service_id"
,
#
"service_id",
"service_city"
,
#
"service_city",
"recommend_service_id"
,
#
"recommend_service_id",
"recommend_service_city"
,
#
"recommend_service_city",
"recommend_service_price"
,
#
"recommend_service_price",
"device_fd2"
,
#
"device_fd2",
"device_sd2"
,
#
"device_sd2",
"device_fs2"
,
#
"device_fs2",
"device_ss2"
,
#
"device_ss2",
"device_fp2"
,
#
"device_fp2",
"device_sp2"
,
#
"device_sp2",
"device_p2"
,
#
"device_p2",
"device_fd3"
,
#
"device_fd3",
"device_sd3"
,
#
"device_sd3",
"device_fs3"
,
#
"device_fs3",
"device_ss3"
,
#
"device_ss3",
"device_fp3"
,
#
"device_fp3",
"device_sp3"
,
#
"device_sp3",
"device_p3"
,
#
"device_p3",
]
]
PREDICTION_ALL_COLUMNS
=
_int_columns
+
_float_columns
+
_categorical_columns
PREDICTION_ALL_COLUMNS
=
_int_columns
+
_float_columns
+
_categorical_columns
...
...
src/models/esmm/fe/diary_fe.py
View file @
c60da982
...
@@ -124,18 +124,18 @@ DIARY_COLUMNS = [
...
@@ -124,18 +124,18 @@ DIARY_COLUMNS = [
"first_positions_num"
,
"first_positions_num"
,
"second_positions_num"
,
"second_positions_num"
,
"projects_num"
,
"projects_num"
,
"is_related_service"
,
#
"is_related_service",
"effect_second_skip_num"
,
#
"effect_second_skip_num",
"business_second_skip_num"
,
#
"business_second_skip_num",
"effect_second_skip_rate"
,
#
"effect_second_skip_rate",
"business_second_skip_rate"
,
#
"business_second_skip_rate",
"service_id"
,
#
"service_id",
"service_price"
,
#
"service_price",
"service_sold_num"
,
#
"service_sold_num",
"service_city"
,
#
"service_city",
"recommend_service_id"
,
#
"recommend_service_id",
"recommend_service_city"
,
#
"recommend_service_city",
"recommend_service_price"
,
#
"recommend_service_price",
]
]
INT_COLUMNS
=
[
INT_COLUMNS
=
[
"active_days"
,
"active_days"
,
...
@@ -213,10 +213,10 @@ INT_COLUMNS = [
...
@@ -213,10 +213,10 @@ INT_COLUMNS = [
"first_positions_num"
,
"first_positions_num"
,
"second_positions_num"
,
"second_positions_num"
,
"projects_num"
,
"projects_num"
,
"effect_second_skip_num"
,
#
"effect_second_skip_num",
"business_second_skip_num"
,
#
"business_second_skip_num",
"service_price"
,
#
"service_price",
"service_sold_num"
,
#
"service_sold_num",
]
]
FLOAT_COLUMNS
=
[
FLOAT_COLUMNS
=
[
"one_ctr"
,
"one_ctr"
,
...
@@ -251,8 +251,8 @@ FLOAT_COLUMNS = [
...
@@ -251,8 +251,8 @@ FLOAT_COLUMNS = [
"sixty_browse_duration_avg"
,
"sixty_browse_duration_avg"
,
"ninety_browse_duration_avg"
,
"ninety_browse_duration_avg"
,
"history_browse_duration_avg"
,
"history_browse_duration_avg"
,
"effect_second_skip_rate"
,
#
"effect_second_skip_rate",
"business_second_skip_rate"
,
#
"business_second_skip_rate",
]
]
CATEGORICAL_COLUMNS
=
[
CATEGORICAL_COLUMNS
=
[
"device_id"
,
"device_id"
,
...
@@ -305,26 +305,26 @@ CATEGORICAL_COLUMNS = [
...
@@ -305,26 +305,26 @@ CATEGORICAL_COLUMNS = [
"click_diary_id3"
,
"click_diary_id3"
,
"click_diary_id4"
,
"click_diary_id4"
,
"click_diary_id5"
,
"click_diary_id5"
,
"is_related_service"
,
#
"is_related_service",
"service_id"
,
#
"service_id",
"service_city"
,
#
"service_city",
"recommend_service_id"
,
#
"recommend_service_id",
"recommend_service_city"
,
#
"recommend_service_city",
"recommend_service_price"
,
#
"recommend_service_price",
"device_fd2"
,
#
"device_fd2",
"device_sd2"
,
#
"device_sd2",
"device_fs2"
,
#
"device_fs2",
"device_ss2"
,
#
"device_ss2",
"device_fp2"
,
#
"device_fp2",
"device_sp2"
,
#
"device_sp2",
"device_p2"
,
#
"device_p2",
"device_fd3"
,
#
"device_fd3",
"device_sd3"
,
#
"device_sd3",
"device_fs3"
,
#
"device_fs3",
"device_ss3"
,
#
"device_ss3",
"device_fp3"
,
#
"device_fp3",
"device_sp3"
,
#
"device_sp3",
"device_p3"
,
#
"device_p3",
]
]
CROSS_COLUMNS
=
[
CROSS_COLUMNS
=
[
[
"device_fd"
,
"content_fd"
],
[
"device_fd"
,
"content_fd"
],
...
@@ -334,20 +334,20 @@ CROSS_COLUMNS = [
...
@@ -334,20 +334,20 @@ CROSS_COLUMNS = [
[
"device_fp"
,
"content_fp"
],
[
"device_fp"
,
"content_fp"
],
[
"device_sp"
,
"content_sp"
],
[
"device_sp"
,
"content_sp"
],
[
"device_p"
,
"content_p"
],
[
"device_p"
,
"content_p"
],
[
"device_fd2"
,
"content_fd"
],
#
["device_fd2", "content_fd"],
[
"device_sd2"
,
"content_sd"
],
#
["device_sd2", "content_sd"],
[
"device_fs2"
,
"content_fs"
],
#
["device_fs2", "content_fs"],
[
"device_ss2"
,
"content_ss"
],
#
["device_ss2", "content_ss"],
[
"device_fp2"
,
"content_fp"
],
#
["device_fp2", "content_fp"],
[
"device_sp2"
,
"content_sp"
],
#
["device_sp2", "content_sp"],
[
"device_p2"
,
"content_p"
],
#
["device_p2", "content_p"],
[
"device_fd3"
,
"content_fd"
],
#
["device_fd3", "content_fd"],
[
"device_sd3"
,
"content_sd"
],
#
["device_sd3", "content_sd"],
[
"device_fs3"
,
"content_fs"
],
#
["device_fs3", "content_fs"],
[
"device_ss3"
,
"content_ss"
],
#
["device_ss3", "content_ss"],
[
"device_fp3"
,
"content_fp"
],
#
["device_fp3", "content_fp"],
[
"device_sp3"
,
"content_sp"
],
#
["device_sp3", "content_sp"],
[
"device_p3"
,
"content_p"
],
#
["device_p3", "content_p"],
]
]
...
@@ -422,15 +422,15 @@ def diary_feature_engineering(df):
...
@@ -422,15 +422,15 @@ def diary_feature_engineering(df):
diary_df
[
"is_have_pure_reply"
]
=
diary_df
[
"is_have_pure_reply"
]
.
astype
(
int
)
diary_df
[
"is_have_pure_reply"
]
=
diary_df
[
"is_have_pure_reply"
]
.
astype
(
int
)
diary_df
[
"is_have_reply"
]
=
diary_df
[
"is_have_reply"
]
.
astype
(
int
)
diary_df
[
"is_have_reply"
]
=
diary_df
[
"is_have_reply"
]
.
astype
(
int
)
diary_df
[
"is_related_service"
]
=
diary_df
[
"is_related_service"
]
.
astype
(
int
)
#
diary_df["is_related_service"] = diary_df["is_related_service"].astype(int)
diary_df
[
"service_id"
]
=
diary_df
[
"service_id"
]
.
astype
(
str
)
#
diary_df["service_id"] = diary_df["service_id"].astype(str)
diary_df
[
"recommend_service_id"
]
=
diary_df
[
"recommend_service_id"
]
.
astype
(
str
)
#
diary_df["recommend_service_id"] = diary_df["recommend_service_id"].astype(str)
diary_df
[
"recommend_service_price"
]
=
diary_df
[
"recommend_service_price"
]
.
astype
(
str
)
#
diary_df["recommend_service_price"] = diary_df["recommend_service_price"].astype(str)
diary_df
[
"service_id"
]
=
diary_df
[
"service_id"
]
.
fillna
(
"-1"
)
#
diary_df["service_id"] = diary_df["service_id"].fillna("-1")
diary_df
[
"service_city"
]
=
diary_df
[
"service_city"
]
.
fillna
(
""
)
#
diary_df["service_city"] = diary_df["service_city"].fillna("")
diary_df
[
"recommend_service_id"
]
=
diary_df
[
"recommend_service_id"
]
.
fillna
(
"-1"
)
#
diary_df["recommend_service_id"] = diary_df["recommend_service_id"].fillna("-1")
diary_df
[
"recommend_service_city"
]
=
diary_df
[
"recommend_service_city"
]
.
fillna
(
""
)
#
diary_df["recommend_service_city"] = diary_df["recommend_service_city"].fillna("")
diary_df
=
diary_df
[
DIARY_COLUMNS
]
diary_df
=
diary_df
[
DIARY_COLUMNS
]
...
@@ -463,21 +463,21 @@ def join_features(device_df, diary_df, cc_df):
...
@@ -463,21 +463,21 @@ def join_features(device_df, diary_df, cc_df):
df
[
"device_sp"
]
=
df
[
"second_positions_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
df
[
"device_sp"
]
=
df
[
"second_positions_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
df
[
"device_p"
]
=
df
[
"projects_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
df
[
"device_p"
]
=
df
[
"projects_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
df
[
"device_fd2"
]
=
df
[
"first_demands_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
#
df["device_fd2"] = df["first_demands_x"].apply(lambda x: nth_element(x, 1))
df
[
"device_sd2"
]
=
df
[
"second_demands_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
#
df["device_sd2"] = df["second_demands_x"].apply(lambda x: nth_element(x, 1))
df
[
"device_fs2"
]
=
df
[
"first_solutions_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
#
df["device_fs2"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 1))
df
[
"device_ss2"
]
=
df
[
"second_solutions_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
#
df["device_ss2"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 1))
df
[
"device_fp2"
]
=
df
[
"first_positions_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
#
df["device_fp2"] = df["first_positions_x"].apply(lambda x: nth_element(x, 1))
df
[
"device_sp2"
]
=
df
[
"second_positions_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
#
df["device_sp2"] = df["second_positions_x"].apply(lambda x: nth_element(x, 1))
df
[
"device_p2"
]
=
df
[
"projects_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
#
df["device_p2"] = df["projects_x"].apply(lambda x: nth_element(x, 1))
df
[
"device_fd3"
]
=
df
[
"first_demands_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
#
df["device_fd3"] = df["first_demands_x"].apply(lambda x: nth_element(x, 2))
df
[
"device_sd3"
]
=
df
[
"second_demands_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
#
df["device_sd3"] = df["second_demands_x"].apply(lambda x: nth_element(x, 2))
df
[
"device_fs3"
]
=
df
[
"first_solutions_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
#
df["device_fs3"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 2))
df
[
"device_ss3"
]
=
df
[
"second_solutions_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
#
df["device_ss3"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 2))
df
[
"device_fp3"
]
=
df
[
"first_positions_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
#
df["device_fp3"] = df["first_positions_x"].apply(lambda x: nth_element(x, 2))
df
[
"device_sp3"
]
=
df
[
"second_positions_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
#
df["device_sp3"] = df["second_positions_x"].apply(lambda x: nth_element(x, 2))
df
[
"device_p3"
]
=
df
[
"projects_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
#
df["device_p3"] = df["projects_x"].apply(lambda x: nth_element(x, 2))
df
[
"content_fd"
]
=
df
[
"first_demands_y"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
df
[
"content_fd"
]
=
df
[
"first_demands_y"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
df
[
"content_sd"
]
=
df
[
"second_demands_y"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
df
[
"content_sd"
]
=
df
[
"second_demands_y"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
...
@@ -566,20 +566,20 @@ def device_diary_fe(device_id, diary_ids, device_dict, diary_dict):
...
@@ -566,20 +566,20 @@ def device_diary_fe(device_id, diary_ids, device_dict, diary_dict):
device_info
[
"device_fp"
]
=
nth_element
(
device_fp
,
0
)
device_info
[
"device_fp"
]
=
nth_element
(
device_fp
,
0
)
device_info
[
"device_sp"
]
=
nth_element
(
device_sp
,
0
)
device_info
[
"device_sp"
]
=
nth_element
(
device_sp
,
0
)
device_info
[
"device_p"
]
=
nth_element
(
device_p
,
0
)
device_info
[
"device_p"
]
=
nth_element
(
device_p
,
0
)
device_info
[
"device_fd2"
]
=
nth_element
(
device_fd
,
1
)
#
device_info["device_fd2"] = nth_element(device_fd, 1)
device_info
[
"device_sd2"
]
=
nth_element
(
device_sd
,
1
)
#
device_info["device_sd2"] = nth_element(device_sd, 1)
device_info
[
"device_fs2"
]
=
nth_element
(
device_fs
,
1
)
#
device_info["device_fs2"] = nth_element(device_fs, 1)
device_info
[
"device_ss2"
]
=
nth_element
(
device_ss
,
1
)
#
device_info["device_ss2"] = nth_element(device_ss, 1)
device_info
[
"device_fp2"
]
=
nth_element
(
device_fp
,
1
)
#
device_info["device_fp2"] = nth_element(device_fp, 1)
device_info
[
"device_sp2"
]
=
nth_element
(
device_sp
,
1
)
#
device_info["device_sp2"] = nth_element(device_sp, 1)
device_info
[
"device_p2"
]
=
nth_element
(
device_p
,
1
)
#
device_info["device_p2"] = nth_element(device_p, 1)
device_info
[
"device_fd3"
]
=
nth_element
(
device_fd
,
2
)
#
device_info["device_fd3"] = nth_element(device_fd, 2)
device_info
[
"device_sd3"
]
=
nth_element
(
device_sd
,
2
)
#
device_info["device_sd3"] = nth_element(device_sd, 2)
device_info
[
"device_fs3"
]
=
nth_element
(
device_fs
,
2
)
#
device_info["device_fs3"] = nth_element(device_fs, 2)
device_info
[
"device_ss3"
]
=
nth_element
(
device_ss
,
2
)
#
device_info["device_ss3"] = nth_element(device_ss, 2)
device_info
[
"device_fp3"
]
=
nth_element
(
device_fp
,
2
)
#
device_info["device_fp3"] = nth_element(device_fp, 2)
device_info
[
"device_sp3"
]
=
nth_element
(
device_sp
,
2
)
#
device_info["device_sp3"] = nth_element(device_sp, 2)
device_info
[
"device_p3"
]
=
nth_element
(
device_p
,
2
)
#
device_info["device_p3"] = nth_element(device_p, 2)
diary_lst
=
[]
diary_lst
=
[]
diary_ids_res
=
[]
diary_ids_res
=
[]
for
id
in
diary_ids
:
for
id
in
diary_ids
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment