Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
gm_strategy_cvr
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
gm_strategy_cvr
Commits
f9fc2b16
Commit
f9fc2b16
authored
Jul 22, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
try predict
parent
68f0379a
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
310 additions
and
151 deletions
+310
-151
main.py
src/main.py
+80
-70
fe.py
src/models/esmm/fe.py
+189
-77
model.py
src/models/esmm/model.py
+41
-4
No files found.
src/main.py
View file @
f9fc2b16
...
@@ -12,9 +12,10 @@ import tensorflow as tf
...
@@ -12,9 +12,10 @@ import tensorflow as tf
from
sklearn.model_selection
import
train_test_split
from
sklearn.model_selection
import
train_test_split
from
models.esmm.fe
import
(
click_feature_engineering
,
device_feature_engineering
,
diary_feature_engineering
,
from
models.esmm.fe
import
(
click_feature_engineering
,
device_feature_engineering
,
diary_feature_engineering
,
get_device_df_from_redis
,
get_diary_df_from_redis
,
join_device_diary
,
join_features
,
read_csv_data
)
get_device_dict_from_redis
,
get_diary_dict_from_redis
,
join_device_diary
,
join_features
,
read_csv_data
)
from
models.esmm.input_fn
import
build_features
,
esmm_input_fn
from
models.esmm.input_fn
import
build_features
,
esmm_input_fn
from
models.esmm.model
import
esmm_model_fn
,
model_export
,
model_predict
from
models.esmm.model
import
(
esmm_model_fn
,
model_export
,
model_predict
,
model_predict2
)
# tf.compat.v1.enable_eager_execution()
# tf.compat.v1.enable_eager_execution()
...
@@ -66,9 +67,10 @@ def main():
...
@@ -66,9 +67,10 @@ def main():
# print("save to: " + save_path)
# print("save to: " + save_path)
save_path
=
"/home/gmuser/data/models/1595317247"
save_path
=
"/home/gmuser/data/models/1595317247"
# save_path = str(Path("~/Desktop/models/1595297428").expanduser())
predict_fn
=
tf
.
contrib
.
predictor
.
from_saved_model
(
save_path
)
predict_fn
=
tf
.
contrib
.
predictor
.
from_saved_model
(
save_path
)
# for i in range(
10
):
# for i in range(
5
):
# test_300 = test_df.sample(300)
# test_300 = test_df.sample(300)
# model_predict(test_300, predict_fn)
# model_predict(test_300, predict_fn)
...
@@ -78,73 +80,81 @@ def main():
...
@@ -78,73 +80,81 @@ def main():
# "16195283", "16838351", "17161073", "17297878", "17307484", "17396235", "16418737", "16995481", "17312201", "12237988"
# "16195283", "16838351", "17161073", "17297878", "17307484", "17396235", "16418737", "16995481", "17312201", "12237988"
# ]
# ]
df
=
get_device_df_from_redis
()
# df = get_device_df_from_redis()
df2
=
get_diary_df_from_redis
()
# df2 = get_diary_df_from_redis()
redis_device_df
=
device_feature_engineering
(
df
)
# redis_device_df = device_feature_engineering(df)
redis_diary_df
=
diary_feature_engineering
(
df2
,
from_redis
=
True
)
# redis_diary_df = diary_feature_engineering(df2, from_redis=True)
device_ids
=
list
(
redis_device_df
[
"device_id"
]
.
values
)[:
20
]
# device_ids = list(redis_device_df["device_id"].values)[:20]
diary_ids
=
list
(
redis_diary_df
[
"card_id"
]
.
values
)
# diary_ids = list(redis_diary_df["card_id"].values)
def
test1
():
device_dict
=
get_device_dict_from_redis
()
time_1
=
timeit
.
default_timer
()
diary_dict
=
get_diary_dict_from_redis
()
user1
=
join_device_diary
(
random
.
sample
(
device_ids
,
1
)[
0
],
random
.
sample
(
diary_ids
,
300
),
redis_device_df
,
redis_diary_df
)
total_1
=
(
timeit
.
default_timer
()
-
time_1
)
device_ids
=
list
(
device_dict
.
keys
())[:
20
]
print
(
"join df cost {:.5f}s"
.
format
(
total_1
))
diary_ids
=
list
(
diary_dict
.
keys
())
time_1
=
timeit
.
default_timer
()
model_predict2
(
random
.
sample
(
device_ids
,
1
)[
0
],
random
.
sample
(
diary_ids
,
300
),
device_dict
,
diary_dict
,
predict_fn
)
model_predict
(
user1
,
predict_fn
)
total_1
=
(
timeit
.
default_timer
()
-
time_1
)
# def test1():
print
(
"total prediction cost {:.5f}s"
.
format
(
total_1
),
"
\n
"
)
# time_1 = timeit.default_timer()
# user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
def
test2
():
# total_1 = (timeit.default_timer() - time_1)
time_1
=
timeit
.
default_timer
()
# print("join df cost {:.5f}s".format(total_1))
user1
=
join_device_diary
(
random
.
sample
(
device_ids
,
1
)[
0
],
random
.
sample
(
diary_ids
,
300
),
redis_device_df
,
redis_diary_df
)
total_1
=
(
timeit
.
default_timer
()
-
time_1
)
# time_1 = timeit.default_timer()
print
(
"join df cost {:.5f}s"
.
format
(
total_1
))
# model_predict(user1, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
time_1
=
timeit
.
default_timer
()
# print("total prediction cost {:.5f}s".format(total_1), "\n")
model_predict
(
user1
,
predict_fn
)
total_1
=
(
timeit
.
default_timer
()
-
time_1
)
# def test2():
print
(
"total prediction cost {:.5f}s"
.
format
(
total_1
),
"
\n
"
)
# time_1 = timeit.default_timer()
# user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
def
test3
():
# total_1 = (timeit.default_timer() - time_1)
time_1
=
timeit
.
default_timer
()
# print("join df cost {:.5f}s".format(total_1))
user1
=
join_device_diary
(
random
.
sample
(
device_ids
,
1
)[
0
],
random
.
sample
(
diary_ids
,
300
),
redis_device_df
,
redis_diary_df
)
total_1
=
(
timeit
.
default_timer
()
-
time_1
)
# time_1 = timeit.default_timer()
print
(
"join df cost {:.5f}s"
.
format
(
total_1
))
# model_predict(user1, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
time_1
=
timeit
.
default_timer
()
# print("total prediction cost {:.5f}s".format(total_1), "\n")
model_predict
(
user1
,
predict_fn
)
total_1
=
(
timeit
.
default_timer
()
-
time_1
)
# def test3():
print
(
"total prediction cost {:.5f}s"
.
format
(
total_1
),
"
\n
"
)
# time_1 = timeit.default_timer()
# user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
def
test4
():
# total_1 = (timeit.default_timer() - time_1)
time_1
=
timeit
.
default_timer
()
# print("join df cost {:.5f}s".format(total_1))
user1
=
join_device_diary
(
random
.
sample
(
device_ids
,
1
)[
0
],
random
.
sample
(
diary_ids
,
300
),
redis_device_df
,
redis_diary_df
)
total_1
=
(
timeit
.
default_timer
()
-
time_1
)
# time_1 = timeit.default_timer()
print
(
"join df cost {:.5f}s"
.
format
(
total_1
))
# model_predict(user1, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
time_1
=
timeit
.
default_timer
()
# print("total prediction cost {:.5f}s".format(total_1), "\n")
model_predict
(
user1
,
predict_fn
)
total_1
=
(
timeit
.
default_timer
()
-
time_1
)
# def test4():
print
(
"total prediction cost {:.5f}s"
.
format
(
total_1
),
"
\n
"
)
# time_1 = timeit.default_timer()
# user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
def
test5
():
# total_1 = (timeit.default_timer() - time_1)
time_1
=
timeit
.
default_timer
()
# print("join df cost {:.5f}s".format(total_1))
user1
=
join_device_diary
(
random
.
sample
(
device_ids
,
1
)[
0
],
random
.
sample
(
diary_ids
,
300
),
redis_device_df
,
redis_diary_df
)
total_1
=
(
timeit
.
default_timer
()
-
time_1
)
# time_1 = timeit.default_timer()
print
(
"join df cost {:.5f}s"
.
format
(
total_1
))
# model_predict(user1, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
time_1
=
timeit
.
default_timer
()
# print("total prediction cost {:.5f}s".format(total_1), "\n")
model_predict
(
user1
,
predict_fn
)
total_1
=
(
timeit
.
default_timer
()
-
time_1
)
# def test5():
print
(
"total prediction cost {:.5f}s"
.
format
(
total_1
),
"
\n
"
)
# time_1 = timeit.default_timer()
# user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
test1
()
# total_1 = (timeit.default_timer() - time_1)
test2
()
# print("join df cost {:.5f}s".format(total_1))
test3
()
test4
()
# time_1 = timeit.default_timer()
test5
()
# model_predict(user1, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
# print("total prediction cost {:.5f}s".format(total_1), "\n")
# test1()
# test2()
# test3()
# test4()
# test5()
total_time
=
(
time
.
time
()
-
time_begin
)
/
60
total_time
=
(
time
.
time
()
-
time_begin
)
/
60
print
(
"total cost {:.2f} mins at {}"
.
format
(
total_time
,
datetime
.
now
()))
print
(
"total cost {:.2f} mins at {}"
.
format
(
total_time
,
datetime
.
now
()))
...
...
src/models/esmm/fe.py
View file @
f9fc2b16
import
timeit
import
pandas
as
pd
import
pandas
as
pd
from
utils.cache
import
redis_db_client
from
utils.cache
import
redis_db_client
...
@@ -15,24 +17,65 @@ def read_csv_data(dataset_path):
...
@@ -15,24 +17,65 @@ def read_csv_data(dataset_path):
return
device_df
,
diary_df
,
click_df
,
conversion_df
return
device_df
,
diary_df
,
click_df
,
conversion_df
def
_get_data_from_redis
(
key
):
# def _get_data_from_redis(key):
column_key
=
key
+
":column"
# column_key = key + ":column"
d
=
redis_db_client
.
hgetall
(
key
)
# d = redis_db_client.hgetall(key)
tmp
=
d
.
values
()
# tmp = d.values()
lists
=
[]
# lists = []
for
i
in
tmp
:
# for i in tmp:
lists
.
append
(
str
(
i
,
"utf-8"
)
.
split
(
"|"
))
# lists.append(str(i, "utf-8").split("|"))
columns
=
str
(
redis_db_client
.
get
(
column_key
),
"utf-8"
)
.
split
(
"|"
)
# columns = str(redis_db_client.get(column_key), "utf-8").split("|")
df
=
pd
.
DataFrame
(
lists
,
columns
=
columns
)
# df = pd.DataFrame(lists, columns=columns)
return
df
# return df
def
get_device_df_from_redis
():
return
_get_data_from_redis
(
"cvr:db:device"
)
def
get_diary_df_from_redis
():
def
get_device_dict_from_redis
():
return
_get_data_from_redis
(
"cvr:db:content:diary"
)
db_key
=
"cvr:db:device"
column_key
=
db_key
+
":column"
columns
=
str
(
redis_db_client
.
get
(
column_key
),
"utf-8"
)
.
split
(
"|"
)
d
=
redis_db_client
.
hgetall
(
db_key
)
res
=
{}
for
i
in
d
.
values
():
row_list
=
str
(
i
,
"utf-8"
)
.
split
(
"|"
)
tmp
=
{}
for
(
index
,
elem
)
in
enumerate
(
row_list
):
col_name
=
columns
[
index
]
if
col_name
in
[
"first_demands"
,
"second_demands"
,
"first_solutions"
,
"second_solutions"
,
"first_positions"
,
"second_positions"
,
"projects"
]:
tmp
[
col_name
]
=
elem
.
split
(
","
)
else
:
tmp
[
col_name
]
=
elem
res
[
tmp
[
"device_id"
]]
=
tmp
return
res
def
get_diary_dict_from_redis
():
db_key
=
"cvr:db:content:diary"
column_key
=
db_key
+
":column"
columns
=
str
(
redis_db_client
.
get
(
column_key
),
"utf-8"
)
.
split
(
"|"
)
d
=
redis_db_client
.
hgetall
(
db_key
)
res
=
{}
for
i
in
d
.
values
():
row_list
=
str
(
i
,
"utf-8"
)
.
split
(
"|"
)
tmp
=
{}
for
(
index
,
elem
)
in
enumerate
(
row_list
):
col_name
=
columns
[
index
]
if
col_name
in
[
"first_demands"
,
"second_demands"
,
"first_solutions"
,
"second_solutions"
,
"first_positions"
,
"second_positions"
,
"projects"
]:
tmp
[
col_name
]
=
elem
.
split
(
","
)
elif
col_name
in
[
"is_pure_author"
,
"is_have_pure_reply"
,
"is_have_reply"
]:
if
elem
==
"true"
:
tmp
[
col_name
]
=
1
else
:
tmp
[
col_name
]
=
0
else
:
tmp
[
col_name
]
=
elem
res
[
tmp
[
"card_id"
]]
=
tmp
return
res
def
device_feature_engineering
(
df
):
def
device_feature_engineering
(
df
):
...
@@ -195,64 +238,133 @@ def join_features(device_df, diary_df, cc_df):
...
@@ -195,64 +238,133 @@ def join_features(device_df, diary_df, cc_df):
return
df
return
df
def
join_device_diary
(
device_id
,
diary_ids
,
device_df
,
diary_df
):
# def join_device_diary(device_id, diary_ids, device_df, diary_df):
a_df
=
device_df
.
loc
[
device_df
[
"device_id"
]
==
device_id
]
# a_df = device_df.loc[device_df["device_id"] == device_id]
b_df
=
diary_df
.
loc
[
diary_df
[
"card_id"
]
.
isin
(
diary_ids
)]
# b_df = diary_df.loc[diary_df["card_id"].isin(diary_ids)]
b_df
[
"device_id"
]
=
device_id
# b_df["device_id"] = device_id
df
=
pd
.
merge
(
a_df
,
b_df
,
how
=
"left"
,
on
=
"device_id"
)
# df = pd.merge(a_df, b_df, how="left", on="device_id")
df
[
"first_demands"
]
=
df
[[
"first_demands_x"
,
"first_demands_y"
]]
.
apply
(
lambda
x
:
common_elements
(
*
x
),
axis
=
1
)
# df["first_demands"] = df[["first_demands_x", "first_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
df
[
"second_demands"
]
=
df
[[
"second_demands_x"
,
"second_demands_y"
]]
.
apply
(
lambda
x
:
common_elements
(
*
x
),
axis
=
1
)
# df["second_demands"] = df[["second_demands_x", "second_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
df
[
"first_solutions"
]
=
df
[[
"first_solutions_x"
,
"first_solutions_y"
]]
.
apply
(
lambda
x
:
common_elements
(
*
x
),
axis
=
1
)
# df["first_solutions"] = df[["first_solutions_x", "first_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
df
[
"second_solutions"
]
=
df
[[
"second_solutions_x"
,
"second_solutions_y"
]]
.
apply
(
lambda
x
:
common_elements
(
*
x
),
axis
=
1
)
# df["second_solutions"] = df[["second_solutions_x", "second_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
df
[
"first_positions"
]
=
df
[[
"first_positions_x"
,
"second_positions_y"
]]
.
apply
(
lambda
x
:
common_elements
(
*
x
),
axis
=
1
)
# df["first_positions"] = df[["first_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
df
[
"second_positions"
]
=
df
[[
"second_positions_x"
,
"second_positions_y"
]]
.
apply
(
lambda
x
:
common_elements
(
*
x
),
axis
=
1
)
# df["second_positions"] = df[["second_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
df
[
"projects"
]
=
df
[[
"projects_x"
,
"projects_y"
]]
.
apply
(
lambda
x
:
common_elements
(
*
x
),
axis
=
1
)
# df["projects"] = df[["projects_x", "projects_y"]].apply(lambda x: common_elements(*x), axis=1)
df
[
"device_fd"
]
=
df
[
"first_demands_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["device_fd"] = df["first_demands_x"].apply(lambda x: nth_element(x, 0))
df
[
"device_sd"
]
=
df
[
"second_demands_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["device_sd"] = df["second_demands_x"].apply(lambda x: nth_element(x, 0))
df
[
"device_fs"
]
=
df
[
"first_solutions_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["device_fs"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 0))
df
[
"device_ss"
]
=
df
[
"second_solutions_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["device_ss"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 0))
df
[
"device_fp"
]
=
df
[
"first_positions_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["device_fp"] = df["first_positions_x"].apply(lambda x: nth_element(x, 0))
df
[
"device_sp"
]
=
df
[
"second_positions_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0))
df
[
"device_p"
]
=
df
[
"projects_x"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0))
df
[
"content_fd"
]
=
df
[
"first_demands_y"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0))
df
[
"content_sd"
]
=
df
[
"second_demands_y"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0))
df
[
"content_fs"
]
=
df
[
"first_solutions_y"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["content_fs"] = df["first_solutions_y"].apply(lambda x: nth_element(x, 0))
df
[
"content_ss"
]
=
df
[
"second_solutions_y"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["content_ss"] = df["second_solutions_y"].apply(lambda x: nth_element(x, 0))
df
[
"content_fp"
]
=
df
[
"first_positions_y"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["content_fp"] = df["first_positions_y"].apply(lambda x: nth_element(x, 0))
df
[
"content_sp"
]
=
df
[
"second_positions_y"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["content_sp"] = df["second_positions_y"].apply(lambda x: nth_element(x, 0))
df
[
"content_p"
]
=
df
[
"projects_y"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["content_p"] = df["projects_y"].apply(lambda x: nth_element(x, 0))
df
[
"fd1"
]
=
df
[
"first_demands"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["fd1"] = df["first_demands"].apply(lambda x: nth_element(x, 0))
df
[
"fd2"
]
=
df
[
"first_demands"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
# df["fd2"] = df["first_demands"].apply(lambda x: nth_element(x, 1))
df
[
"fd3"
]
=
df
[
"first_demands"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
# df["fd3"] = df["first_demands"].apply(lambda x: nth_element(x, 2))
df
[
"sd1"
]
=
df
[
"second_demands"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["sd1"] = df["second_demands"].apply(lambda x: nth_element(x, 0))
df
[
"sd2"
]
=
df
[
"second_demands"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
# df["sd2"] = df["second_demands"].apply(lambda x: nth_element(x, 1))
df
[
"sd3"
]
=
df
[
"second_demands"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
# df["sd3"] = df["second_demands"].apply(lambda x: nth_element(x, 2))
df
[
"fs1"
]
=
df
[
"first_solutions"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["fs1"] = df["first_solutions"].apply(lambda x: nth_element(x, 0))
df
[
"fs2"
]
=
df
[
"first_solutions"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
# df["fs2"] = df["first_solutions"].apply(lambda x: nth_element(x, 1))
df
[
"fs3"
]
=
df
[
"first_solutions"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
# df["fs3"] = df["first_solutions"].apply(lambda x: nth_element(x, 2))
df
[
"ss1"
]
=
df
[
"second_solutions"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["ss1"] = df["second_solutions"].apply(lambda x: nth_element(x, 0))
df
[
"ss2"
]
=
df
[
"second_solutions"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
# df["ss2"] = df["second_solutions"].apply(lambda x: nth_element(x, 1))
df
[
"ss3"
]
=
df
[
"second_solutions"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
# df["ss3"] = df["second_solutions"].apply(lambda x: nth_element(x, 2))
df
[
"fp1"
]
=
df
[
"first_positions"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["fp1"] = df["first_positions"].apply(lambda x: nth_element(x, 0))
df
[
"fp2"
]
=
df
[
"first_positions"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
# df["fp2"] = df["first_positions"].apply(lambda x: nth_element(x, 1))
df
[
"fp3"
]
=
df
[
"first_positions"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
# df["fp3"] = df["first_positions"].apply(lambda x: nth_element(x, 2))
df
[
"sp1"
]
=
df
[
"second_positions"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["sp1"] = df["second_positions"].apply(lambda x: nth_element(x, 0))
df
[
"sp2"
]
=
df
[
"second_positions"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
# df["sp2"] = df["second_positions"].apply(lambda x: nth_element(x, 1))
df
[
"sp3"
]
=
df
[
"second_positions"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
# df["sp3"] = df["second_positions"].apply(lambda x: nth_element(x, 2))
df
[
"p1"
]
=
df
[
"projects"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
0
))
# df["p1"] = df["projects"].apply(lambda x: nth_element(x, 0))
df
[
"p2"
]
=
df
[
"projects"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
1
))
# df["p2"] = df["projects"].apply(lambda x: nth_element(x, 1))
df
[
"p3"
]
=
df
[
"projects"
]
.
apply
(
lambda
x
:
nth_element
(
x
,
2
))
# df["p3"] = df["projects"].apply(lambda x: nth_element(x, 2))
drop_columns
=
[
# drop_columns = [
"first_demands_x"
,
"first_demands_y"
,
"first_demands"
,
"second_demands_x"
,
"second_demands_y"
,
"second_demands"
,
# "first_demands_x", "first_demands_y", "first_demands", "second_demands_x", "second_demands_y", "second_demands",
"first_solutions_x"
,
"first_solutions_y"
,
"first_solutions"
,
"second_solutions_x"
,
"second_solutions_y"
,
# "first_solutions_x", "first_solutions_y", "first_solutions", "second_solutions_x", "second_solutions_y",
"second_solutions"
,
"first_positions_x"
,
"first_positions_y"
,
"first_positions"
,
"second_positions_x"
,
# "second_solutions", "first_positions_x", "first_positions_y", "first_positions", "second_positions_x",
"second_positions_y"
,
"second_positions"
,
"projects_x"
,
"projects_y"
,
"projects"
# "second_positions_y", "second_positions", "projects_x", "projects_y", "projects"
]
# ]
df
.
drop
(
drop_columns
,
inplace
=
True
,
axis
=
1
)
# df.drop(drop_columns, inplace=True, axis=1)
return
df
# return df
def
device_diary_fe
(
device_id
,
diary_ids
,
device_dict
,
diary_dict
):
time_1
=
timeit
.
default_timer
()
device_info
=
device_dict
.
get
(
device_id
,
{})
.
copy
()
device_fd
=
device_info
.
get
(
"first_demands"
,
[])
device_sd
=
device_info
.
get
(
"second_demands"
,
[])
device_fs
=
device_info
.
get
(
"first_solutions"
,
[])
device_ss
=
device_info
.
get
(
"second_solutions"
,
[])
device_fp
=
device_info
.
get
(
"first_positions"
,
[])
device_sp
=
device_info
.
get
(
"second_positions"
,
[])
device_p
=
device_info
.
get
(
"projects"
,
[])
device_info
[
"device_fd"
]
=
nth_element
(
device_fd
,
0
)
device_info
[
"device_sd"
]
=
nth_element
(
device_sd
,
0
)
device_info
[
"device_fs"
]
=
nth_element
(
device_fs
,
0
)
device_info
[
"device_ss"
]
=
nth_element
(
device_ss
,
0
)
device_info
[
"device_fp"
]
=
nth_element
(
device_fp
,
0
)
device_info
[
"device_sp"
]
=
nth_element
(
device_sp
,
0
)
device_info
[
"device_p"
]
=
nth_element
(
device_p
,
0
)
diary_lst
=
[]
for
id
in
diary_ids
:
tmp
=
diary_dict
.
get
(
id
,
{})
.
copy
()
if
tmp
:
diary_fd
=
tmp
.
get
(
"first_demands"
,
[])
diary_sd
=
tmp
.
get
(
"second_demands"
,
[])
diary_fs
=
tmp
.
get
(
"first_solutions"
,
[])
diary_ss
=
tmp
.
get
(
"second_solutions"
,
[])
diary_fp
=
tmp
.
get
(
"first_positions"
,
[])
diary_sp
=
tmp
.
get
(
"second_positions"
,
[])
diary_p
=
tmp
.
get
(
"projects"
,
[])
common_fd
=
common_elements
(
device_fd
,
diary_fd
)
common_sd
=
common_elements
(
device_sd
,
diary_sd
)
common_fs
=
common_elements
(
device_fs
,
diary_fs
)
common_ss
=
common_elements
(
device_ss
,
diary_ss
)
common_fp
=
common_elements
(
device_fp
,
diary_fp
)
common_sp
=
common_elements
(
device_sp
,
diary_sp
)
common_p
=
common_elements
(
device_p
,
diary_p
)
tmp
[
"diary_fd"
]
=
nth_element
(
diary_fd
,
0
)
tmp
[
"diary_sd"
]
=
nth_element
(
diary_sd
,
0
)
tmp
[
"diary_fs"
]
=
nth_element
(
diary_fs
,
0
)
tmp
[
"diary_ss"
]
=
nth_element
(
diary_ss
,
0
)
tmp
[
"diary_fp"
]
=
nth_element
(
diary_fp
,
0
)
tmp
[
"diary_sp"
]
=
nth_element
(
diary_sp
,
0
)
tmp
[
"diary_p"
]
=
nth_element
(
diary_p
,
0
)
tmp
[
"fd1"
]
=
nth_element
(
common_fd
,
0
)
tmp
[
"fd2"
]
=
nth_element
(
common_fd
,
1
)
tmp
[
"fd3"
]
=
nth_element
(
common_fd
,
2
)
tmp
[
"sd1"
]
=
nth_element
(
common_sd
,
0
)
tmp
[
"sd2"
]
=
nth_element
(
common_sd
,
1
)
tmp
[
"sd3"
]
=
nth_element
(
common_sd
,
2
)
tmp
[
"fs1"
]
=
nth_element
(
common_fs
,
0
)
tmp
[
"fs2"
]
=
nth_element
(
common_fs
,
1
)
tmp
[
"fs3"
]
=
nth_element
(
common_fs
,
2
)
tmp
[
"ss1"
]
=
nth_element
(
common_ss
,
0
)
tmp
[
"ss2"
]
=
nth_element
(
common_ss
,
1
)
tmp
[
"ss3"
]
=
nth_element
(
common_ss
,
2
)
tmp
[
"fp1"
]
=
nth_element
(
common_fp
,
0
)
tmp
[
"fp2"
]
=
nth_element
(
common_fp
,
1
)
tmp
[
"fp3"
]
=
nth_element
(
common_fp
,
2
)
tmp
[
"sp1"
]
=
nth_element
(
common_sp
,
0
)
tmp
[
"sp2"
]
=
nth_element
(
common_sp
,
1
)
tmp
[
"sp3"
]
=
nth_element
(
common_sp
,
2
)
tmp
[
"p1"
]
=
nth_element
(
common_p
,
0
)
tmp
[
"p2"
]
=
nth_element
(
common_p
,
1
)
tmp
[
"p3"
]
=
nth_element
(
common_p
,
2
)
diary_lst
.
append
(
tmp
)
total_1
=
(
timeit
.
default_timer
()
-
time_1
)
print
(
"join device diary cost {:.5f}s"
.
format
(
total_1
))
return
device_info
,
diary_lst
src/models/esmm/model.py
View file @
f9fc2b16
import
timeit
import
timeit
import
numba
import
tensorflow
as
tf
import
tensorflow
as
tf
from
tensorflow
import
feature_column
as
fc
from
tensorflow
import
feature_column
as
fc
from
tensorflow.python.estimator.canned
import
head
as
head_lib
from
tensorflow.python.estimator.canned
import
head
as
head_lib
from
tensorflow.python.ops.losses
import
losses
from
tensorflow.python.ops.losses
import
losses
from
.fe
import
device_diary_fe
from
.utils
import
common_elements
,
nth_element
def
build_deep_layer
(
net
,
params
):
def
build_deep_layer
(
net
,
params
):
for
num_hidden_units
in
params
[
"hidden_units"
]:
for
num_hidden_units
in
params
[
"hidden_units"
]:
...
@@ -92,6 +94,41 @@ def _bytes_feature(value):
...
@@ -92,6 +94,41 @@ def _bytes_feature(value):
return
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
value
]))
return
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
value
]))
def
model_predict2
(
device_id
,
diary_ids
,
device_dict
,
diary_dict
,
predict_fn
):
time_1
=
timeit
.
default_timer
()
device_info
,
diary_lst
=
device_diary_fe
(
device_id
,
diary_ids
,
device_dict
,
diary_dict
)
int_columns
=
[
"active_type"
,
"active_days"
,
"card_id"
,
"is_pure_author"
,
"is_have_reply"
,
"is_have_pure_reply"
,
"content_level"
,
"topic_num"
,
"favor_num"
,
"vote_num"
]
float_columns
=
[
"one_ctr"
,
"three_ctr"
,
"seven_ctr"
,
"fifteen_ctr"
]
str_columns
=
[
"device_id"
,
"past_consume_ability_history"
,
"potential_consume_ability_history"
,
"price_sensitive_history"
,
"device_fd"
,
"device_sd"
,
"device_fs"
,
"device_ss"
,
"device_fp"
,
"device_sp"
,
"device_p"
,
"content_fd"
,
"content_sd"
,
"content_fs"
,
"content_ss"
,
"content_fp"
,
"content_sp"
,
"content_p"
,
"fd1"
,
"fd2"
,
"fd3"
,
"sd1"
,
"sd2"
,
"sd3"
,
"fs1"
,
"fs2"
,
"fs3"
,
"ss1"
,
"ss2"
,
"ss3"
,
"fp1"
,
"fp2"
,
"fp3"
,
"sp1"
,
"sp2"
,
"sp3"
,
"p1"
,
"p2"
,
"p3"
]
examples
=
[]
for
diary_info
in
diary_lst
:
tmp
=
{}
tmp
.
update
(
device_info
)
tmp
.
update
(
diary_info
)
features
=
{}
for
(
col
,
value
)
in
tmp
.
items
():
if
col
in
int_columns
:
features
[
col
]
=
_int64_feature
(
int
(
value
))
elif
col
in
float_columns
:
features
[
col
]
=
_float_feature
(
float
(
value
))
elif
col
in
str_columns
:
features
[
col
]
=
_bytes_feature
(
str
(
value
)
.
encode
(
encoding
=
"utf-8"
))
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
features
))
examples
.
append
(
example
.
SerializeToString
())
predictions
=
predict_fn
({
"examples"
:
examples
})
total_1
=
(
timeit
.
default_timer
()
-
time_1
)
print
(
"prediction cost {:.5f}s"
.
format
(
total_1
))
return
predictions
def
model_predict
(
inputs
,
predict_fn
):
def
model_predict
(
inputs
,
predict_fn
):
time_1
=
timeit
.
default_timer
()
time_1
=
timeit
.
default_timer
()
int_columns
=
[
int_columns
=
[
...
@@ -106,11 +143,11 @@ def model_predict(inputs, predict_fn):
...
@@ -106,11 +143,11 @@ def model_predict(inputs, predict_fn):
if
col
in
[
"click_label"
,
"conversion_label"
]:
if
col
in
[
"click_label"
,
"conversion_label"
]:
pass
pass
elif
col
in
int_columns
:
elif
col
in
int_columns
:
features
[
col
]
=
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
int
(
value
)]
))
features
[
col
]
=
_int64_feature
(
int
(
value
))
elif
col
in
float_columns
:
elif
col
in
float_columns
:
features
[
col
]
=
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
[
float
(
value
)]
))
features
[
col
]
=
_float_feature
(
float
(
value
))
else
:
else
:
features
[
col
]
=
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
str
(
value
)
.
encode
(
encoding
=
"utf-8"
)]
))
features
[
col
]
=
_bytes_feature
(
str
(
value
)
.
encode
(
encoding
=
"utf-8"
))
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
features
))
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
features
))
examples
.
append
(
example
.
SerializeToString
())
examples
.
append
(
example
.
SerializeToString
())
total_1
=
(
timeit
.
default_timer
()
-
time_1
)
total_1
=
(
timeit
.
default_timer
()
-
time_1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment