import numpy as np, pandas as pd from sklearn.cluster import DBSCAN from shapely.geometry import MultiPoint import geopandas import shapefile from matplotlib import pyplot as plt data = pd.read_csv("/Users/mac/Downloads/location.csv") data.drop(["device_id", "partition_date"], axis=1, inplace=True) data = data[["lat", "lng"]] data = data.as_matrix().astype("float32", copy=False)#convert to array plt.title("beijing location") plt.scatter(latlngs[:, 0], latlngs[:, 1], s=1, c="black", marker='.') border_shape = shapefile.Reader(shape_path) border_shape_2 = shapefile.Reader(shape_path_2huan) border_shape_5 = shapefile.Reader(shape_path_5huan) border = border_shape.shapes() border_2 = border_shape_2.shapes() border_5 = border_shape_5.shapes() # 聚类中心区域 def get_centermost_point(cluster): centroid = (MultiPoint(cluster).centroid.x, MultiPoint(cluster).centroid.y) print(centroid) return tuple(centroid) # #渲染聚类结果 for border_detail in clusters: x, y = [], [] for cell in border_detail: x.append(cell[0]) y.append(cell[1]) plt.scatter(x, y, marker='o') plt.show() # coding=utf-8 import numpy as np from scipy.spatial.distance import cdist import matplotlib.pyplot as plt import seaborn as sns sns.set() from sklearn.cluster import DBSCAN from sklearn.preprocessing import StandardScaler import pandas as pd data = pd.read_csv("/Users/mac/Downloads/location.csv") data.drop(["device_id", "partition_date"], axis=1, inplace=True) data = data[["lat", "lng"]] data = data.as_matrix().astype("float32", copy=False)#convert to array #数据预处理,特征标准化,每一维是零均值和单位方差 stscaler = StandardScaler().fit(data) data = stscaler.transform(data) #画出x和y的散点图 plt.scatter(data[:, 0], data[:, 1]) plt.xlabel("lat") plt.ylabel("lng") plt.title("beijng_users") # plt.savefig("results/wholesale.png", format="PNG") dbsc = DBSCAN(eps=0.5, min_samples=15).fit(data) labels = dbsc.labels_ #聚类得到每个点的聚类标签 -1表示噪点 #print(labels) core_samples = np.zeros_like(labels, dtype=bool) #构造和labels一致的零矩阵,值是false core_samples[dbsc.core_sample_indices_] = True #print(core_samples) unique_labels = np.unique(labels) colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels))) #linespace返回在【0,1】之间均匀分布数字是len个,Sepectral生成len个颜色 #print(zip(unique_labels,colors)) for (label, color) in zip(unique_labels, colors): class_member_mask = (labels == label) print(class_member_mask&core_samples) xy = data[class_member_mask & core_samples] plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=color, markersize=10) xy2 = data[class_member_mask & ~core_samples] plt.plot(xy2[:, 0], xy2[:, 1], 'o', markerfacecolor=color, markersize=5) plt.title("DBSCAN on beijing_users") plt.xlabel("lat (scaled)") plt.ylabel("lng (scaled)") # plt.savefig("results/(0.9,15)dbscan_wholesale.png", format="PNG")