1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import numpy as np, pandas as pd
from sklearn.cluster import DBSCAN
from shapely.geometry import MultiPoint
import geopandas
import shapefile
from matplotlib import pyplot as plt
data = pd.read_csv("/Users/mac/Downloads/location.csv")
data.drop(["device_id", "partition_date"], axis=1, inplace=True)
data = data[["lat", "lng"]]
data = data.as_matrix().astype("float32", copy=False)#convert to array
plt.title("beijing location")
plt.scatter(latlngs[:, 0], latlngs[:, 1], s=1, c="black", marker='.')
border_shape = shapefile.Reader(shape_path)
border_shape_2 = shapefile.Reader(shape_path_2huan)
border_shape_5 = shapefile.Reader(shape_path_5huan)
border = border_shape.shapes()
border_2 = border_shape_2.shapes()
border_5 = border_shape_5.shapes()
# 聚类中心区域
def get_centermost_point(cluster):
centroid = (MultiPoint(cluster).centroid.x, MultiPoint(cluster).centroid.y)
print(centroid)
return tuple(centroid)
# #渲染聚类结果
for border_detail in clusters:
x, y = [], []
for cell in border_detail:
x.append(cell[0])
y.append(cell[1])
plt.scatter(x, y, marker='o')
plt.show()
# coding=utf-8
import numpy as np
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import pandas as pd
data = pd.read_csv("/Users/mac/Downloads/location.csv")
data.drop(["device_id", "partition_date"], axis=1, inplace=True)
data = data[["lat", "lng"]]
data = data.as_matrix().astype("float32", copy=False)#convert to array
#数据预处理,特征标准化,每一维是零均值和单位方差
stscaler = StandardScaler().fit(data)
data = stscaler.transform(data)
#画出x和y的散点图
plt.scatter(data[:, 0], data[:, 1])
plt.xlabel("lat")
plt.ylabel("lng")
plt.title("beijng_users")
# plt.savefig("results/wholesale.png", format="PNG")
dbsc = DBSCAN(eps=0.5, min_samples=15).fit(data)
labels = dbsc.labels_ #聚类得到每个点的聚类标签 -1表示噪点
#print(labels)
core_samples = np.zeros_like(labels, dtype=bool) #构造和labels一致的零矩阵,值是false
core_samples[dbsc.core_sample_indices_] = True
#print(core_samples)
unique_labels = np.unique(labels)
colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels))) #linespace返回在【0,1】之间均匀分布数字是len个,Sepectral生成len个颜色
#print(zip(unique_labels,colors))
for (label, color) in zip(unique_labels, colors):
class_member_mask = (labels == label)
print(class_member_mask&core_samples)
xy = data[class_member_mask & core_samples]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=color, markersize=10)
xy2 = data[class_member_mask & ~core_samples]
plt.plot(xy2[:, 0], xy2[:, 1], 'o', markerfacecolor=color, markersize=5)
plt.title("DBSCAN on beijing_users")
plt.xlabel("lat (scaled)")
plt.ylabel("lng (scaled)")
# plt.savefig("results/(0.9,15)dbscan_wholesale.png", format="PNG")