Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
13d68701
Commit
13d68701
authored
Sep 16, 2019
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改测试文件
parent
037a733c
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
79 additions
and
38 deletions
+79
-38
ctr-56.py
ctr-56.py
+0
-23
location.py
location.py
+54
-0
user_data.py
user_data.py
+25
-15
No files found.
ctr-56.py
deleted
100644 → 0
View file @
037a733c
# -*- coding: UTF-8 -*-
import
redis
import
datetime
import
json
if
__name__
==
"__main__"
:
device_id
=
"D17A3770-1CC7-4AFB-A9EA-6E667EE051FF"
search_qa_recommend_key
=
"TS:search_recommend_answer_queue:device_id:"
+
str
(
device_id
)
r
=
redis
.
StrictRedis
.
from_url
(
"redis://redis.paas-test.env:6379/1"
)
cids
=
list
(
range
(
529405
,
529408
))
cids
=
[
str
(
i
)
for
i
in
cids
]
value
=
json
.
dumps
(
cids
)
r
.
hset
(
search_qa_recommend_key
,
'answer_queue'
,
value
)
print
(
1
)
location.py
0 → 100644
View file @
13d68701
# coding=utf-8
import
numpy
as
np
from
scipy.spatial.distance
import
cdist
import
matplotlib.pyplot
as
plt
import
seaborn
as
sns
sns
.
set
()
from
sklearn.cluster
import
DBSCAN
from
sklearn.preprocessing
import
StandardScaler
import
pandas
as
pd
data
=
pd
.
read_csv
(
"/Users/mac/Downloads/location.csv"
)
data
.
drop
([
"device_id"
,
"partition_date"
],
axis
=
1
,
inplace
=
True
)
data
=
data
[[
"lat"
,
"lng"
]]
data
=
data
.
as_matrix
()
.
astype
(
"float32"
,
copy
=
False
)
#convert to array
#数据预处理,特征标准化,每一维是零均值和单位方差
stscaler
=
StandardScaler
()
.
fit
(
data
)
data
=
stscaler
.
transform
(
data
)
#画出x和y的散点图
plt
.
scatter
(
data
[:,
0
],
data
[:,
1
])
plt
.
xlabel
(
"lat"
)
plt
.
ylabel
(
"lng"
)
plt
.
title
(
"beijng_users"
)
# plt.savefig("results/wholesale.png", format="PNG")
dbsc
=
DBSCAN
(
eps
=
0.5
,
min_samples
=
15
)
.
fit
(
data
)
labels
=
dbsc
.
labels_
#聚类得到每个点的聚类标签 -1表示噪点
#print(labels)
core_samples
=
np
.
zeros_like
(
labels
,
dtype
=
bool
)
#构造和labels一致的零矩阵,值是false
core_samples
[
dbsc
.
core_sample_indices_
]
=
True
#print(core_samples)
unique_labels
=
np
.
unique
(
labels
)
colors
=
plt
.
cm
.
Spectral
(
np
.
linspace
(
0
,
1
,
len
(
unique_labels
)))
#linespace返回在【0,1】之间均匀分布数字是len个,Sepectral生成len个颜色
#print(zip(unique_labels,colors))
for
(
label
,
color
)
in
zip
(
unique_labels
,
colors
):
class_member_mask
=
(
labels
==
label
)
print
(
class_member_mask
&
core_samples
)
xy
=
data
[
class_member_mask
&
core_samples
]
plt
.
plot
(
xy
[:,
0
],
xy
[:,
1
],
'o'
,
markerfacecolor
=
color
,
markersize
=
10
)
xy2
=
data
[
class_member_mask
&
~
core_samples
]
plt
.
plot
(
xy2
[:,
0
],
xy2
[:,
1
],
'o'
,
markerfacecolor
=
color
,
markersize
=
5
)
plt
.
title
(
"DBSCAN on beijing_users"
)
plt
.
xlabel
(
"lat (scaled)"
)
plt
.
ylabel
(
"lng (scaled)"
)
plt
.
savefig
(
"results/(0.9,15)dbscan_wholesale.png"
,
format
=
"PNG"
)
user_data.py
View file @
13d68701
...
...
@@ -76,6 +76,15 @@ def get_order():
r
+=
10000
print
(
"insert done"
)
def
get_meigou_tag
():
sql
=
"select service_id,tag_id from api_servicetag"
df
=
pd
.
DataFrame
(
list
(
get_mysql_data
(
host
,
port
,
user
,
passwd
,
db
,
sql
)))
df
=
df
.
rename
(
columns
=
{
0
:
"service_id"
,
1
:
"tag_id"
})
pd
.
io
.
sql
.
to_sql
(
df
,
"meigou_tag"
,
yconnect
,
schema
=
'jerry_test'
,
if_exists
=
'append'
,
index
=
False
)
print
(
"insert done"
)
def
meigou_to_csv
():
sql
=
"select device_id,service_id,created_time from meigou_order"
df
=
pd
.
DataFrame
(
list
(
get_mysql_data
(
host
,
port
,
user
,
passwd
,
db
,
sql
)))
...
...
@@ -111,21 +120,22 @@ def location_to_csv():
if
__name__
==
"__main__"
:
# host = "172.16.30.141"
# port = 3306
# user = "work"
# passwd = "BJQaT9VzDcuPBqkd"
# db = "zhengxing"
# yconnect = create_engine('mysql+pymysql://root:3SYz54LS9#^9sBvC@172.16.40.170:4000/jerry_test?charset=utf8')
# print("end")
host
=
"172.16.40.170"
port
=
4000
user
=
"root"
passwd
=
"3SYz54LS9#^9sBvC"
db
=
"jerry_test"
location_to_csv
()
host
=
"172.16.30.141"
port
=
3306
user
=
"work"
passwd
=
"BJQaT9VzDcuPBqkd"
db
=
"zhengxing"
yconnect
=
create_engine
(
'mysql+pymysql://root:3SYz54LS9#^9sBvC@172.16.40.170:4000/jerry_test?charset=utf8'
)
get_meigou_tag
()
print
(
"end"
)
# host = "172.16.40.170"
# port = 4000
# user = "root"
# passwd = "3SYz54LS9#^9sBvC"
# db = "jerry_test"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment