Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
cf261c84
Commit
cf261c84
authored
5 years ago
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
change test file
parent
c0f0b733
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
3 deletions
+14
-3
feature_engineering.py
eda/esmm/Model_pipline/feature_engineering.py
+14
-3
No files found.
eda/esmm/Model_pipline/feature_engineering.py
View file @
cf261c84
...
...
@@ -159,6 +159,9 @@ def feature_engineer():
sql
=
"select distinct recover_time from knowledge"
unique_values
.
extend
(
get_unique
(
db
,
sql
))
cid_star
=
[
"star_0"
,
"star_1"
,
"star_2"
,
"star_3"
,
"star_4"
,
"star_5"
,
"star_3.5"
]
unique_values
.
extend
(
cid_star
)
# unique_values.append("video")
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
...
...
@@ -195,7 +198,7 @@ def feature_engineer():
"wiki.tag as tag1,question.tag as tag2,search.tag as tag3,budan.tag as tag4,"
\
"ot.tag as tag5,sixin.tag as tag6,cart.tag as tag7,doris.search_tag2,doris.search_tag3,"
\
"k.treatment_method,k.price_min,k.price_max,k.treatment_time,k.maintain_time,k.recover_time,"
\
"e.device_id,e.cid_id "
\
"e.device_id,e.cid_id
,concat('star','_',star.content_level) as content_level
"
\
"from jerry_test.esmm_train_data_dwell e left join jerry_test.user_feature u on e.device_id = u.device_id "
\
"left join jerry_test.cid_type_top c on e.device_id = c.device_id "
\
"left join jerry_test.cid_time_cut cut on e.cid_id = cut.cid "
\
...
...
@@ -212,6 +215,7 @@ def feature_engineer():
"left join eagle.src_zhengxing_api_service service on e.diary_service_id = service.id "
\
"left join eagle.src_zhengxing_api_doctor doctor on service.doctor_id = doctor.id "
\
"left join jerry_test.search_doris doris on e.device_id = doris.device_id and e.stat_date = doris.get_date "
\
"left join eagle.src_mimas_prod_api_diary star on e.cid_id = star.id "
\
"where e.stat_date >= '{}'"
.
format
(
start
)
df
=
spark
.
sql
(
sql
)
...
...
@@ -385,7 +389,14 @@ if __name__ == '__main__':
path
=
"hdfs:///strategy/esmm/"
local_path
=
"/home/gmuser/esmm/"
validate_date
,
value_map
,
app_list_map
,
leve2_map
,
leve3_map
=
feature_engineer
()
get_predict
(
validate_date
,
value_map
,
app_list_map
,
leve2_map
,
leve3_map
)
sql
=
"select e.cid_id,concat('star','_',star.content_level) as content_level "
\
"from jerry_test.esmm_train_data_dwell e "
\
"left join eagle.src_mimas_prod_api_diary star on e.cid_id = star.id "
\
"where e.stat_date = '2019-08-13'"
spark
.
sql
(
sql
)
.
show
()
# validate_date, value_map, app_list_map, leve2_map, leve3_map = feature_engineer()
# get_predict(validate_date, value_map, app_list_map, leve2_map, leve3_map)
spark
.
stop
()
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment