Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
gm_strategy_cvr
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
gm_strategy_cvr
Commits
be82b754
Commit
be82b754
authored
Jul 30, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update columns
parent
a23369bd
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
32 deletions
+28
-32
diary_fe.py
src/models/esmm/fe/diary_fe.py
+12
-18
tractate_fe.py
src/models/esmm/fe/tractate_fe.py
+14
-14
train_tractate.py
src/train_tractate.py
+2
-0
No files found.
src/models/esmm/fe/diary_fe.py
View file @
be82b754
...
...
@@ -12,6 +12,15 @@ DIARY_COLUMNS = [
"one_ctr"
,
"three_ctr"
,
"seven_ctr"
,
"fifteen_ctr"
,
"first_demands"
,
"second_demands"
,
"first_solutions"
,
"second_solutions"
,
"first_positions"
,
"second_positions"
,
"projects"
]
_int_columns
=
[
"active_days"
,
"topic_num"
,
"favor_num"
,
"vote_num"
]
_float_columns
=
[
"one_ctr"
,
"three_ctr"
,
"seven_ctr"
,
"fifteen_ctr"
]
_categorical_columns
=
[
"device_id"
,
"active_type"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"card_id"
,
"is_pure_author"
,
"is_have_reply"
,
"is_have_pure_reply"
,
"content_level"
,
"device_fd"
,
"content_fd"
,
"fd1"
,
"fd2"
,
"fd3"
,
"device_sd"
,
"content_sd"
,
"sd1"
,
"sd2"
,
"sd3"
,
"device_fs"
,
"content_fs"
,
"fs1"
,
"fs2"
,
"fs3"
,
"device_ss"
,
"content_ss"
,
"ss1"
,
"ss2"
,
"ss3"
,
"device_fp"
,
"content_fp"
,
"fp1"
,
"fp2"
,
"fp3"
,
"device_sp"
,
"content_sp"
,
"sp1"
,
"sp2"
,
"sp3"
,
"device_p"
,
"content_p"
,
"p1"
,
"p2"
,
"p3"
]
def
read_csv_data
(
dataset_path
):
...
...
@@ -152,31 +161,16 @@ def join_features(device_df, diary_df, cc_df):
def
build_features
(
df
):
# TODO
int_columns
=
[
"active_days"
,
"topic_num"
,
"favor_num"
,
"vote_num"
]
float_columns
=
[
"one_ctr"
,
"three_ctr"
,
"seven_ctr"
,
"fifteen_ctr"
]
numeric_features
=
[]
for
col
in
(
int_columns
+
float_columns
):
if
col
in
int_columns
:
for
col
in
(
_int_columns
+
_
float_columns
):
if
col
in
_
int_columns
:
numeric_features
.
append
(
fc
.
bucketized_column
(
fc
.
numeric_column
(
col
,
dtype
=
tf
.
int64
),
boundaries
=
create_boundaries
(
df
,
col
)))
else
:
numeric_features
.
append
(
fc
.
bucketized_column
(
fc
.
numeric_column
(
col
),
boundaries
=
create_boundaries
(
df
,
col
)))
# TODO
categorical_columns
=
[
"device_id"
,
"active_type"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"card_id"
,
"is_pure_author"
,
"is_have_reply"
,
"is_have_pure_reply"
,
"content_level"
,
"device_fd"
,
"content_fd"
,
"fd1"
,
"fd2"
,
"fd3"
,
"device_sd"
,
"content_sd"
,
"sd1"
,
"sd2"
,
"sd3"
,
"device_fs"
,
"content_fs"
,
"fs1"
,
"fs2"
,
"fs3"
,
"device_ss"
,
"content_ss"
,
"ss1"
,
"ss2"
,
"ss3"
,
"device_fp"
,
"content_fp"
,
"fp1"
,
"fp2"
,
"fp3"
,
"device_sp"
,
"content_sp"
,
"sp1"
,
"sp2"
,
"sp3"
,
"device_p"
,
"content_p"
,
"p1"
,
"p2"
,
"p3"
]
categorical_ignore_columns
=
[]
categorical_features
=
[]
for
col
in
categorical_columns
:
if
col
not
in
categorical_ignore_columns
:
for
col
in
_categorical_columns
:
if
col
==
"card_id"
:
categorical_features
.
append
(
fc
.
embedding_column
(
fc
.
categorical_column_with_hash_bucket
(
col
,
20000
,
dtype
=
tf
.
int64
),
...
...
src/models/esmm/fe/tractate_fe.py
View file @
be82b754
...
...
@@ -11,6 +11,16 @@ TRACTATE_COLUMNS = [
"first_demands"
,
"second_demands"
,
"first_solutions"
,
"second_solutions"
,
"first_positions"
,
"second_positions"
,
"projects"
]
_int_columns
=
[
"active_days"
,
"reply_num"
,
"reply_pure_num"
]
_float_columns
=
[
"one_ctr"
,
"three_ctr"
,
"seven_ctr"
,
"fifteen_ctr"
,
"thirty_ctr"
,
"sixty_ctr"
,
"ninety_ctr"
,
"history_ctr"
]
_categorical_columns
=
[
"device_id"
,
"active_type"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"card_id"
,
"is_pure_author"
,
"is_have_reply"
,
"is_have_pure_reply"
,
"content_level"
,
"show_tag_id"
,
"device_fd"
,
"content_fd"
,
"fd1"
,
"fd2"
,
"fd3"
,
"device_sd"
,
"content_sd"
,
"sd1"
,
"sd2"
,
"sd3"
,
"device_fs"
,
"content_fs"
,
"fs1"
,
"fs2"
,
"fs3"
,
"device_ss"
,
"content_ss"
,
"ss1"
,
"ss2"
,
"ss3"
,
"device_fp"
,
"content_fp"
,
"fp1"
,
"fp2"
,
"fp3"
,
"device_sp"
,
"content_sp"
,
"sp1"
,
"sp2"
,
"sp3"
,
"device_p"
,
"content_p"
,
"p1"
,
"p2"
,
"p3"
]
def
read_csv_data
(
dataset_path
):
tractate_df
=
pd
.
read_csv
(
dataset_path
.
joinpath
(
"tractate.csv"
),
sep
=
"|"
)
...
...
@@ -120,27 +130,20 @@ def join_features(device_df, tractate_df, cc_df):
"second_solutions"
,
"first_positions_x"
,
"first_positions_y"
,
"first_positions"
,
"second_positions_x"
,
"second_positions_y"
,
"second_positions"
,
"projects_x"
,
"projects_y"
,
"projects"
]
# for col in drop_columns:
# if col in df.columns:
# df.drop(col, inplace=True, axis=1)
df
.
drop
(
drop_columns
,
inplace
=
True
,
axis
=
1
)
return
df
def
build_features
(
df
):
# TODO
int_columns
=
[
"active_days"
,
"topic_num"
,
"favor_num"
,
"vote_num"
]
float_columns
=
[
"one_ctr"
,
"three_ctr"
,
"seven_ctr"
,
"fifteen_ctr"
]
numeric_features
=
[]
for
col
in
(
int_columns
+
float_columns
):
if
col
in
int_columns
:
for
col
in
(
_int_columns
+
_
float_columns
):
if
col
in
_
int_columns
:
numeric_features
.
append
(
fc
.
bucketized_column
(
fc
.
numeric_column
(
col
,
dtype
=
tf
.
int64
),
boundaries
=
create_boundaries
(
df
,
col
)))
else
:
numeric_features
.
append
(
fc
.
bucketized_column
(
fc
.
numeric_column
(
col
),
boundaries
=
create_boundaries
(
df
,
col
)))
# TODO
categorical_columns
=
[
_categorical_columns
=
[
"device_id"
,
"active_type"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"card_id"
,
"is_pure_author"
,
"is_have_reply"
,
"is_have_pure_reply"
,
"content_level"
,
"device_fd"
,
"content_fd"
,
"fd1"
,
"fd2"
,
"fd3"
,
"device_sd"
,
"content_sd"
,
"sd1"
,
"sd2"
,
"sd3"
,
"device_fs"
,
"content_fs"
,
...
...
@@ -148,11 +151,8 @@ def build_features(df):
"device_sp"
,
"content_sp"
,
"sp1"
,
"sp2"
,
"sp3"
,
"device_p"
,
"content_p"
,
"p1"
,
"p2"
,
"p3"
]
categorical_ignore_columns
=
[]
categorical_features
=
[]
for
col
in
categorical_columns
:
if
col
not
in
categorical_ignore_columns
:
for
col
in
_categorical_columns
:
if
col
==
"card_id"
:
categorical_features
.
append
(
fc
.
embedding_column
(
fc
.
categorical_column_with_hash_bucket
(
col
,
20000
,
dtype
=
tf
.
int64
),
...
...
src/train_tractate.py
View file @
be82b754
...
...
@@ -25,6 +25,8 @@ def main():
cc_df
=
click_fe
.
click_feature_engineering
(
tractate_click_df
,
tractate_conversion_df
)
df
=
tractate_fe
.
join_features
(
device_df
,
tractate_df
,
cc_df
)
for
i
in
df
.
columns
:
print
(
i
)
# print(df.dtypes)
train_df
,
test_df
=
train_test_split
(
df
,
test_size
=
0.2
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment