Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
759b7930
Commit
759b7930
authored
5 years ago
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
把最近一天的数据集放进训练集
parent
a6ae0551
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
7 deletions
+5
-7
multi.py
tensnsorflow/multi.py
+5
-7
No files found.
tensnsorflow/multi.py
View file @
759b7930
...
@@ -85,8 +85,6 @@ def feature_engineer():
...
@@ -85,8 +85,6 @@ def feature_engineer():
temp
=
list
(
range
(
2
+
apps_number
+
level2_number
+
level3_number
,
temp
=
list
(
range
(
2
+
apps_number
+
level2_number
+
level3_number
,
2
+
apps_number
+
level2_number
+
level3_number
+
len
(
unique_values
)))
2
+
apps_number
+
level2_number
+
level3_number
+
len
(
unique_values
)))
value_map
=
dict
(
zip
(
unique_values
,
temp
))
value_map
=
dict
(
zip
(
unique_values
,
temp
))
print
(
"tets"
)
print
(
value_map
[
"top"
])
rdd
=
df
.
select
(
"app_list"
,
"level2_ids"
,
"level3_ids"
,
"stat_date"
,
"ucity_id"
,
"ccity_name"
,
"device_type"
,
"manufacturer"
,
rdd
=
df
.
select
(
"app_list"
,
"level2_ids"
,
"level3_ids"
,
"stat_date"
,
"ucity_id"
,
"ccity_name"
,
"device_type"
,
"manufacturer"
,
"channel"
,
"top"
,
"time"
,
"hospital_id"
,
"treatment_method"
,
"price_min"
,
"channel"
,
"top"
,
"time"
,
"hospital_id"
,
"treatment_method"
,
"price_min"
,
...
@@ -138,15 +136,15 @@ def get_predict(date,value_map,app_list_map,level2_map,level3_map):
...
@@ -138,15 +136,15 @@ def get_predict(date,value_map,app_list_map,level2_map,level3_map):
"channel"
,
"top"
,
"time"
,
"app_list"
,
"hospital_id"
,
"level3_ids"
])
"channel"
,
"top"
,
"time"
,
"app_list"
,
"hospital_id"
,
"level3_ids"
])
rdd
=
df
.
select
(
"app_list"
,
"level2_ids"
,
"level3_ids"
,
"ucity_id"
,
"device_id"
,
"cid_id"
,
"label"
,
"y"
,
"z"
,
rdd
=
df
.
select
(
"app_list"
,
"level2_ids"
,
"level3_ids"
,
"ucity_id"
,
"device_id"
,
"cid_id"
,
"label"
,
"y"
,
"z"
,
"ccity_name"
,
"device_type"
,
"manufacturer"
,
"channel"
,
"t
op"
,
"t
ime"
,
"hospital_id"
,
"ccity_name"
,
"device_type"
,
"manufacturer"
,
"channel"
,
"time"
,
"hospital_id"
,
"treatment_method"
,
"price_min"
,
"price_max"
,
"treatment_time"
,
"maintain_time"
,
"treatment_method"
,
"price_min"
,
"price_max"
,
"treatment_time"
,
"maintain_time"
,
"recover_time"
)
\
"recover_time"
,
"top"
)
\
.
rdd
.
map
(
lambda
x
:
(
app_list_func
(
x
[
0
],
app_list_map
),
app_list_func
(
x
[
1
],
level2_map
),
.
rdd
.
map
(
lambda
x
:
(
app_list_func
(
x
[
0
],
app_list_map
),
app_list_func
(
x
[
1
],
level2_map
),
app_list_func
(
x
[
2
],
level3_map
),
x
[
3
],
x
[
4
],
x
[
5
],
x
[
6
],
x
[
7
],
x
[
8
],
app_list_func
(
x
[
2
],
level3_map
),
x
[
3
],
x
[
4
],
x
[
5
],
x
[
6
],
x
[
7
],
x
[
8
],
value_map
[
x
[
3
]],
value_map
[
x
[
9
]],
value_map
[
x
[
3
]],
value_map
[
x
[
9
]],
value_map
[
x
[
10
]],
value_map
[
x
[
11
]],
value_map
[
x
[
12
]],
value_map
[
x
[
13
]],
value_map
[
x
[
10
]],
value_map
[
x
[
11
]],
value_map
[
x
[
12
]],
value_map
[
x
[
13
]],
value_map
[
x
[
14
]],
value_map
[
x
[
15
]],
value_map
[
x
[
16
]],
value_map
[
x
[
17
]],
value_map
[
x
[
14
]],
value_map
[
x
[
15
]],
value_map
[
x
[
16
]],
value_map
[
x
[
17
]],
value_map
[
x
[
18
]],
value_map
[
x
[
19
]],
value_map
[
x
[
20
]],
value_map
[
x
[
21
]]
,
value_map
[
x
[
18
]],
value_map
[
x
[
19
]],
value_map
[
x
[
20
]],
value_map
.
get
(
x
[
21
],
30000
)
,
value_map
[
date
]))
value_map
[
date
]))
rdd
.
persist
()
rdd
.
persist
()
...
@@ -161,9 +159,9 @@ def get_predict(date,value_map,app_list_map,level2_map,level3_map):
...
@@ -161,9 +159,9 @@ def get_predict(date,value_map,app_list_map,level2_map,level3_map):
.
map
(
lambda
x
:
(
x
[
0
],
x
[
1
],
x
[
2
],
x
[
9
],
x
[
10
],
x
[
11
],
x
[
12
],
x
[
13
],
x
[
14
],
x
[
15
],
.
map
(
lambda
x
:
(
x
[
0
],
x
[
1
],
x
[
2
],
x
[
9
],
x
[
10
],
x
[
11
],
x
[
12
],
x
[
13
],
x
[
14
],
x
[
15
],
x
[
16
,
x
[
17
],
x
[
18
],
x
[
19
],
x
[
20
],
x
[
21
],
x
[
22
],
x
[
23
]])))
\
x
[
16
,
x
[
17
],
x
[
18
],
x
[
19
],
x
[
20
],
x
[
21
],
x
[
22
],
x
[
23
]])))
\
.
toDF
(
"app_list"
,
"level2_ids"
,
"level3_ids"
,
"ucity_id"
,
.
toDF
(
"app_list"
,
"level2_ids"
,
"level3_ids"
,
"ucity_id"
,
"ccity_name"
,
"device_type"
,
"manufacturer"
,
"channel"
,
"t
op"
,
"t
ime"
,
"hospital_id"
,
"ccity_name"
,
"device_type"
,
"manufacturer"
,
"channel"
,
"time"
,
"hospital_id"
,
"treatment_method"
,
"price_min"
,
"price_max"
,
"treatment_time"
,
"maintain_time"
,
"treatment_method"
,
"price_min"
,
"price_max"
,
"treatment_time"
,
"maintain_time"
,
"recover_time"
,
"stat_date"
)
.
write
.
csv
(
'/recommend/native'
,
mode
=
'overwrite'
,
header
=
True
)
"recover_time"
,
"
top"
,
"
stat_date"
)
.
write
.
csv
(
'/recommend/native'
,
mode
=
'overwrite'
,
header
=
True
)
nearby_pre
=
spark
.
createDataFrame
(
rdd
.
filter
(
lambda
x
:
x
[
6
]
==
1
)
.
map
(
lambda
x
:
(
x
[
3
],
x
[
4
],
x
[
5
])))
\
nearby_pre
=
spark
.
createDataFrame
(
rdd
.
filter
(
lambda
x
:
x
[
6
]
==
1
)
.
map
(
lambda
x
:
(
x
[
3
],
x
[
4
],
x
[
5
])))
\
.
toDF
(
"city"
,
"uid"
,
"cid_id"
)
.
toDF
(
"city"
,
"uid"
,
"cid_id"
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment