不限次数、免费不需要注册。
from sklearn.cluster import KMeans
num_clusters = 4
kmeans = KMeans(init='k-means++', n_clusters=num_clusters, n_init=10)
kmeans.fit(data)
import numpy as np
from sklearn.cluster import MeanShift, estimate_bandwidth
# Estimating the bandwidth
# 设置带宽参数 quantile : 0.1 代表数据集中10%的样本作为聚类中心
# n_samples : 样本数
bandwidth = estimate_bandwidth(X, quantile=0.1, n_samples=len(X))
# Compute clustering with MeanShift
# bin_seeding : 随机种子
# 随机种子,保证每次聚类结果一致
meanshift_estimator = MeanShift(bandwidth=bandwidth, bin_seeding=True)
meanshift_estimator.fit(X)
# 提取标记
labels = meanshift_estimator.labels_
# 聚类中心
centroids = meanshift_estimator.cluster_centers_
num_clusters = len(np.unique(labels))
print("Number of clusters in input data =", num_clusters)
from sklearn.cluster import AgglomerativeClustering
plt.figure()
model = AgglomerativeClustering(linkage=linkage,
connectivity=connectivity, n_clusters=num_clusters)
model.fit(X)
# extract labels
labels = model.labels_
from sklearn.cluster import AffinityPropagation
# 使用亲和传播聚类算法构建聚类模型
# edge_model.covariance_可以获取到股票之间的协方差矩阵,该矩阵表示了不同股票之间的相关性和波动性
_, labels = cluster.affinity_propagation(edge_model.covariance_)
num_labels = labels.max()
# 打印聚类结果
for i in range(num_labels + 1):
print("Cluster", i+1, "-->", ', '.join(names[labels == i]))