Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
gm_strategy_cvr
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
gm_strategy_cvr
Commits
d1b06803
Commit
d1b06803
authored
Jul 30, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
remane
parent
0cd940b7
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
112 additions
and
61 deletions
+112
-61
main_portrait.py
src/main_portrait.py
+2
-2
device_fe.py
src/models/esmm/device_fe.py
+2
-2
fe.py
src/models/esmm/fe.py
+0
-0
click_fe.py
src/models/esmm/fe/click_fe.py
+19
-0
diary_fe.py
src/models/esmm/fe/diary_fe.py
+0
-18
tractate_fe.py
src/models/esmm/fe/tractate_fe.py
+59
-0
model.py
src/models/esmm/model.py
+1
-1
tractate_fe.py
src/models/esmm/tractate_fe.py
+0
-33
train_diary.py
src/train_diary.py
+4
-5
train_tractate.py
src/train_tractate.py
+25
-0
No files found.
src/main_portrait.py
View file @
d1b06803
...
...
@@ -5,8 +5,8 @@ import time
import
tensorflow
as
tf
from
models.esmm
import
device_fe
as
device_fe
from
models.esmm
import
diary_fe
as
diary_fe
from
models.esmm
.fe
import
device_fe
as
device_fe
from
models.esmm
.fe
import
diary_fe
as
diary_fe
from
models.esmm.model
import
model_predict_diary
from
utils.cache
import
redis_client2
from
utils.grey
import
recommed_service_category_device_id_by_tail
...
...
src/models/esmm/device_fe.py
View file @
d1b06803
...
...
@@ -3,7 +3,7 @@ import pandas as pd
from
utils.cache
import
redis_db_client
# "channel_first", "city_first", "model_first",
D
IARY_D
EVICE_COLUMNS
=
[
DEVICE_COLUMNS
=
[
"device_id"
,
"active_type"
,
"active_days"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"first_demands"
,
"second_demands"
,
"first_solutions"
,
"second_solutions"
,
"first_positions"
,
"second_positions"
,
"projects"
...
...
@@ -67,4 +67,4 @@ def device_feature_engineering(df):
print
(
"device:"
)
print
(
nullseries
[
nullseries
>
0
])
print
(
device_df
.
shape
)
return
device_df
[
D
IARY_D
EVICE_COLUMNS
]
return
device_df
[
DEVICE_COLUMNS
]
src/models/esmm/fe.py
deleted
100644 → 0
View file @
0cd940b7
src/models/esmm/fe/click_fe.py
0 → 100644
View file @
d1b06803
import
pandas
as
pd
def
click_feature_engineering
(
click_df
,
conversion_df
):
# click_df = click_df.copy()
# conversion_df = conversion_df.copy()
click_df
.
rename
(
columns
=
{
"label"
:
"click_label"
},
inplace
=
True
)
conversion_df
.
rename
(
columns
=
{
"label"
:
"conversion_label"
},
inplace
=
True
)
cc_df
=
pd
.
merge
(
click_df
,
conversion_df
,
how
=
"left"
,
left_on
=
[
"cl_id"
,
"card_id"
],
right_on
=
[
"cl_id"
,
"card_id"
])
cc_df
.
drop
([
"partition_date_x"
,
"partition_date_y"
],
axis
=
1
,
inplace
=
True
)
cc_df
[
"conversion_label"
]
.
fillna
(
0
,
inplace
=
True
)
print
(
"click:"
)
nullseries
=
cc_df
.
isnull
()
.
sum
()
print
(
nullseries
[
nullseries
>
0
])
print
(
cc_df
.
shape
)
return
cc_df
src/models/esmm/diary_fe.py
→
src/models/esmm/
fe/
diary_fe.py
View file @
d1b06803
...
...
@@ -80,24 +80,6 @@ def diary_feature_engineering(df):
return
diary_df
[
DIARY_COLUMNS
]
def
click_feature_engineering
(
click_df
,
conversion_df
):
# click_df = click_df.copy()
# conversion_df = conversion_df.copy()
click_df
.
rename
(
columns
=
{
"label"
:
"click_label"
},
inplace
=
True
)
conversion_df
.
rename
(
columns
=
{
"label"
:
"conversion_label"
},
inplace
=
True
)
cc_df
=
pd
.
merge
(
click_df
,
conversion_df
,
how
=
"left"
,
left_on
=
[
"cl_id"
,
"card_id"
],
right_on
=
[
"cl_id"
,
"card_id"
])
cc_df
.
drop
([
"partition_date_x"
,
"partition_date_y"
],
axis
=
1
,
inplace
=
True
)
cc_df
[
"conversion_label"
]
.
fillna
(
0
,
inplace
=
True
)
print
(
"click:"
)
nullseries
=
cc_df
.
isnull
()
.
sum
()
print
(
nullseries
[
nullseries
>
0
])
print
(
cc_df
.
shape
)
return
cc_df
def
join_features
(
device_df
,
diary_df
,
cc_df
):
a
=
pd
.
merge
(
device_df
,
cc_df
,
how
=
"inner"
,
left_on
=
"device_id"
,
right_on
=
"cl_id"
)
df
=
pd
.
merge
(
a
,
diary_df
,
how
=
"inner"
,
left_on
=
"card_id"
,
right_on
=
"card_id"
)
...
...
src/models/esmm/fe/tractate_fe.py
0 → 100644
View file @
d1b06803
import
pandas
as
pd
TRACTATE_COLUMNS
=
[
"card_id"
,
"is_pure_author"
,
"is_have_pure_reply"
,
"is_have_reply"
,
"content_level"
,
"show_tag_id"
,
"reply_num"
,
"reply_pure_num"
,
"one_ctr"
,
"three_ctr"
,
"seven_ctr"
,
"fifteen_ctr"
,
"thirty_ctr"
,
"sixty_ctr"
,
"ninety_ctr"
,
"history_ctr"
,
"first_demands"
,
"second_demands"
,
"first_solutions"
,
"second_solutions"
,
"first_positions"
,
"second_positions"
,
"projects"
]
def
read_csv_data
(
dataset_path
):
tractate_df
=
pd
.
read_csv
(
dataset_path
.
joinpath
(
"tractate.csv"
),
sep
=
"|"
)
click_df
=
pd
.
read_csv
(
dataset_path
.
joinpath
(
"tractate_click.csv"
),
sep
=
"|"
)
conversion_df
=
pd
.
read_csv
(
dataset_path
.
joinpath
(
"tractate_click_cvr.csv"
),
sep
=
"|"
)
return
tractate_df
,
click_df
,
conversion_df
def
get_tractate_from_redis
():
"""
return: {diary_id: {first_demands: [], is_pure_author: 1}}
"""
pass
def
tractate_feature_engineering
(
tractate_df
):
df
=
tractate_df
.
copy
()
df
[
"first_demands"
]
=
df
[
"first_demands"
]
.
str
.
split
(
","
)
df
[
"second_demands"
]
=
df
[
"second_demands"
]
.
str
.
split
(
","
)
df
[
"first_solutions"
]
=
df
[
"first_solutions"
]
.
str
.
split
(
","
)
df
[
"second_solutions"
]
=
df
[
"second_solutions"
]
.
str
.
split
(
","
)
df
[
"first_positions"
]
=
df
[
"first_positions"
]
.
str
.
split
(
","
)
df
[
"second_positions"
]
=
df
[
"second_positions"
]
.
str
.
split
(
","
)
df
[
"projects"
]
=
df
[
"projects"
]
.
str
.
split
(
","
)
df
[
"first_demands"
]
=
df
[
"first_demands"
]
.
apply
(
lambda
d
:
d
if
isinstance
(
d
,
list
)
else
[])
df
[
"second_demands"
]
=
df
[
"second_demands"
]
.
apply
(
lambda
d
:
d
if
isinstance
(
d
,
list
)
else
[])
df
[
"first_solutions"
]
=
df
[
"first_solutions"
]
.
apply
(
lambda
d
:
d
if
isinstance
(
d
,
list
)
else
[])
df
[
"second_solutions"
]
=
df
[
"second_solutions"
]
.
apply
(
lambda
d
:
d
if
isinstance
(
d
,
list
)
else
[])
df
[
"first_positions"
]
=
df
[
"first_positions"
]
.
apply
(
lambda
d
:
d
if
isinstance
(
d
,
list
)
else
[])
df
[
"second_positions"
]
=
df
[
"second_positions"
]
.
apply
(
lambda
d
:
d
if
isinstance
(
d
,
list
)
else
[])
df
[
"projects"
]
=
df
[
"projects"
]
.
apply
(
lambda
d
:
d
if
isinstance
(
d
,
list
)
else
[])
df
[
"is_pure_author"
]
=
df
[
"is_pure_author"
]
.
astype
(
int
)
df
[
"is_have_pure_reply"
]
=
df
[
"is_have_pure_reply"
]
.
astype
(
int
)
df
[
"is_have_reply"
]
=
df
[
"is_have_reply"
]
.
astype
(
int
)
print
(
"tractate:"
)
nullseries
=
df
.
isnull
()
.
sum
()
print
(
nullseries
[
nullseries
>
0
])
print
(
df
.
shape
)
return
df
[
TRACTATE_COLUMNS
]
def
join_features
(
device_df
,
tractate_df
,
cc_df
):
pass
def
device_tractate_fe
(
device_id
,
tractate_ids
,
device_dict
,
tractate_dict
):
pass
src/models/esmm/model.py
View file @
d1b06803
...
...
@@ -6,7 +6,7 @@ from tensorflow import feature_column as fc
from
tensorflow.python.estimator.canned
import
head
as
head_lib
from
tensorflow.python.ops.losses
import
losses
from
.diary_fe
import
device_diary_fe
from
.
fe.
diary_fe
import
device_diary_fe
from
.utils
import
common_elements
,
nth_element
...
...
src/models/esmm/tractate_fe.py
deleted
100644 → 0
View file @
0cd940b7
import
pandas
as
pd
TRACTATE_COLUMNS
=
[]
def
read_csv_data
(
dataset_path
):
tractate_df
=
pd
.
read_csv
(
dataset_path
.
joinpath
(
"tractate.csv"
),
sep
=
"|"
)
click_df
=
pd
.
read_csv
(
dataset_path
.
joinpath
(
"tractate_click.csv"
),
sep
=
"|"
)
conversion_df
=
pd
.
read_csv
(
dataset_path
.
joinpath
(
"tractate_click_cvr.csv"
),
sep
=
"|"
)
return
tractate_df
,
click_df
,
conversion_df
def
get_tractate_from_redis
():
"""
return: {diary_id: {first_demands: [], is_pure_author: 1}}
"""
pass
def
tractate_feature_engineering
(
df
):
tractate_df
=
df
.
copy
()
def
click_feature_engineering
(
click_df
,
conversion_df
):
pass
def
join_features
(
device_df
,
tractate_df
,
cc_df
):
pass
def
device_tractate_fe
(
device_id
,
tractate_ids
,
device_dict
,
tractate_dict
):
pass
src/
main
.py
→
src/
train_diary
.py
View file @
d1b06803
...
...
@@ -9,14 +9,13 @@ from pathlib import Path
import
tensorflow
as
tf
from
sklearn.model_selection
import
train_test_split
from
models.esmm
import
device_fe
as
device_fe
from
models.esmm
import
diary_fe
as
diary_fe
from
models.esmm.fe
import
device_fe
as
device_fe
from
models.esmm.fe
import
diary_fe
as
diary_fe
from
models.esmm.fe
import
click_fe
as
click_fe
from
models.esmm.diary_model
import
model_predict_diary
from
models.esmm.input_fn
import
build_features
,
esmm_input_fn
from
models.esmm.model
import
esmm_model_fn
,
model_export
# tf.compat.v1.enable_eager_execution()
def
main
():
time_begin
=
time
.
time
()
...
...
@@ -34,7 +33,7 @@ def main():
# print(device_df.sample(1))
diary_df
=
diary_fe
.
diary_feature_engineering
(
diary_df
)
# print(diary_df.sample(1))
cc_df
=
diary
_fe
.
click_feature_engineering
(
diary_click_df
,
diary_conversion_df
)
cc_df
=
click
_fe
.
click_feature_engineering
(
diary_click_df
,
diary_conversion_df
)
# print(cc_df.sample(1))
df
=
diary_fe
.
join_features
(
device_df
,
diary_df
,
cc_df
)
# print(df.sample(1))
...
...
src/train_tractate.py
0 → 100644
View file @
d1b06803
import
datetime
import
time
from
pathlib
import
Path
from
sklearn.model_selection
import
train_test_split
from
models.esmm.fe
import
device_fe
as
device_fe
from
models.esmm.fe
import
tractate_fe
as
tractate_fe
from
models.esmm.fe
import
click_fe
as
click_fe
def
main
():
time_begin
=
time
.
time
()
data_path
=
Path
(
"~/data/cvr_data"
)
.
expanduser
()
# local
# data_path = Path("/srv/apps/node2vec_git/cvr_data/") # server
tractate_df
,
tractate_click_df
,
tractate_conversion_df
=
tractate_fe
.
read_csv_data
(
data_path
)
device_df
=
device_fe
.
read_csv_data
(
data_path
)
total_time
=
(
time
.
time
()
-
time_begin
)
/
60
print
(
"total cost {:.2f} mins at {}"
.
format
(
total_time
,
datetime
.
now
()))
if
__name__
==
"__main__"
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment