Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
4c592022
Commit
4c592022
authored
Jun 25, 2019
by
Your Name
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
test
parent
23af1a9f
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
3 deletions
+18
-3
predict.py
eda/esmm/Model_pipline/predict.py
+18
-3
No files found.
eda/esmm/Model_pipline/predict.py
View file @
4c592022
...
...
@@ -221,14 +221,22 @@ if __name__ == "__main__":
te_files
=
[
"hdfs://172.16.32.4:8020/strategy/esmm/test_nearby/part-r-00000"
]
#predict and sort
result
=
predict
(
te_files
)
df
=
pd
.
DataFrame
(
result
,
columns
=
[
"uid"
,
"city"
,
"cid_id"
,
"pctcvr"
])
print
(
df
.
head
(
10
))
df
.
to_csv
(
"/home/gmuser/test.csv"
)
df
[
'uid1'
]
=
df
[
'uid'
]
.
apply
(
trans
)
df
[
'city1'
]
=
df
[
'city'
]
.
apply
(
trans
)
df
[
'cid_id1'
]
=
df
[
'cid_id'
]
.
apply
(
trans
)
df2
=
df
.
groupby
(
by
=
[
"uid1"
,
"city1"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"pctcvr"
,
ascending
=
False
))
\
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid1"
,
"city1"
])
.
agg
({
'cid_id1'
:
set_join
})
.
reset_index
(
drop
=
False
)
df2
.
columns
=
[
"device_id"
,
"city_id"
,
"nearby_queue"
]
df2
[
time
]
=
"2019-06-25"
#update or insert
host
=
'172.16.40.158'
port
=
4000
user
=
'root'
...
...
@@ -236,7 +244,14 @@ if __name__ == "__main__":
db
=
'jerry_test'
charset
=
'utf8'
device_count
=
df2
.
shape
[
0
]
con
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cur
=
con
.
cursor
()
for
i
in
range
(
0
,
device_count
):
query
=
"INSERT INTO esmm_device_diary_queue_test (device_id, city_id, time,nearby_queue) VALUES(
%
s,
%
s,
%
s,
%
s) ON DUPLICATE KEY UPDATE device_id=
%
s, city_id=
%
s, time=
%
s, nearby_queue=
%
s"
cur
.
execute
(
query
,
(
df2
.
device_id
[
i
],
df2
.
city_id
[
i
],
df2
.
time
[
i
],
df
.
nearby_queue
[
i
],
df2
.
device_id
[
i
],
df2
.
city_id
[
i
],
df2
.
time
[
i
],
df
.
nearby_queue
[
i
]))
con
.
commit
()
con
.
close
()
print
(
"耗时(min):"
)
print
((
time
.
time
()
-
b
)
/
60
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment