Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
eaf4dc69
Commit
eaf4dc69
authored
Aug 05, 2019
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改特征工程文件
parent
003a8df1
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
4 deletions
+27
-4
feature_engineering.py
eda/esmm/Model_pipline/feature_engineering.py
+4
-3
train.py
eda/esmm/Model_pipline/train.py
+23
-1
No files found.
eda/esmm/Model_pipline/feature_engineering.py
View file @
eaf4dc69
...
...
@@ -245,7 +245,7 @@ def feature_engineer():
# TODO 上线后把下面train fliter 删除,因为最近一天的数据也要作为训练集
train
=
rdd
.
filter
(
lambda
x
:
x
[
0
]
!=
validate_date
)
.
map
(
train
=
rdd
.
map
(
lambda
x
:
(
x
[
1
],
x
[
2
],
x
[
3
],
x
[
4
],
x
[
5
],
x
[
6
],
x
[
7
],
x
[
8
],
x
[
9
],
x
[
10
],
x
[
11
],
x
[
12
],
x
[
13
],
x
[
14
],
x
[
15
],
x
[
16
],
x
[
17
],
x
[
18
]))
f
=
time
.
time
()
...
...
@@ -301,7 +301,8 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):
"left join jerry_test.sixin_tag sixin on e.device_id = sixin.device_id "
\
"left join jerry_test.cart_tag cart on e.device_id = cart.device_id "
\
"left join jerry_test.knowledge k on feat.level2 = k.level2_id "
\
"left join jerry_test.search_doris doris on e.device_id = doris.device_id and e.stat_date = doris.get_date"
"left join jerry_test.search_doris doris on e.device_id = doris.device_id and e.stat_date = doris.get_date "
\
"where device_id = 'C33E2C8E-86E9-4C91-8458-526FB81E4C78'"
features
=
[
"ucity_id"
,
"ccity_name"
,
"device_type"
,
"manufacturer"
,
"channel"
,
"top"
,
"time"
,
"hospital_id"
,
...
...
@@ -386,6 +387,6 @@ if __name__ == '__main__':
local_path
=
"/home/gmuser/esmm/"
validate_date
,
value_map
,
app_list_map
,
leve2_map
,
leve3_map
=
feature_engineer
()
#
get_predict(validate_date, value_map, app_list_map, leve2_map, leve3_map)
get_predict
(
validate_date
,
value_map
,
app_list_map
,
leve2_map
,
leve3_map
)
spark
.
stop
()
eda/esmm/Model_pipline/train.py
View file @
eaf4dc69
...
...
@@ -51,6 +51,13 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
"app_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"level2_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"level3_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"tag1_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"tag2_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"tag3_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"tag4_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"tag5_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"tag6_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"tag7_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"search_tag2_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"search_tag3_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"uid"
:
tf
.
VarLenFeature
(
tf
.
string
),
...
...
@@ -115,6 +122,13 @@ def model_fn(features, labels, mode, params):
app_list
=
features
[
'app_list'
]
level2_list
=
features
[
'level2_list'
]
level3_list
=
features
[
'level3_list'
]
tag1_list
=
features
[
'tag1_list'
]
tag2_list
=
features
[
'tag2_list'
]
tag3_list
=
features
[
'tag3_list'
]
tag4_list
=
features
[
'tag4_list'
]
tag5_list
=
features
[
'tag5_list'
]
tag6_list
=
features
[
'tag6_list'
]
tag7_list
=
features
[
'tag7_list'
]
search_tag2_list
=
features
[
'search_tag2_list'
]
search_tag3_list
=
features
[
'search_tag3_list'
]
uid
=
features
[
'uid'
]
...
...
@@ -132,12 +146,20 @@ def model_fn(features, labels, mode, params):
app_id
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
app_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
level2
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
level2_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
level3
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
level3_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
tag1
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
tag1_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
tag2
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
tag2_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
tag3
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
tag3_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
tag4
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
tag4_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
tag5
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
tag5_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
tag6
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
tag6_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
tag7
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
tag7_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
search_tag2
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
search_tag2_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
search_tag3
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
search_tag3_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
# x_concat = tf.reshape(embedding_id,shape=[-1, common_dims]) # None * (F * K)
x_concat
=
tf
.
concat
([
tf
.
reshape
(
embedding_id
,
shape
=
[
-
1
,
common_dims
]),
app_id
,
level2
,
level3
,
search_tag2
,
search_tag3
],
axis
=
1
)
x_concat
=
tf
.
concat
([
tf
.
reshape
(
embedding_id
,
shape
=
[
-
1
,
common_dims
]),
app_id
,
level2
,
level3
,
tag1
,
tag2
,
tag3
,
tag4
,
tag5
,
tag6
,
tag7
,
search_tag2
,
search_tag3
],
axis
=
1
)
uid
=
tf
.
sparse
.
to_dense
(
uid
,
default_value
=
""
)
city
=
tf
.
sparse
.
to_dense
(
city
,
default_value
=
""
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment