0%

Python数据科学_8_聚类分析基础

手动编写KMeans聚类算法

算法步骤

  1. 随机选取K个样本作为类中心;

  2. 计算各样本与各类中心的距离;

  3. 将各样本归于最近的类中心点;

  4. 求各类的样本的均值,作为新的类中心;

  5. 判定:若类中心不再发生变动或达到迭代次数,算法结束,否则回到第2步。

获取数据

1
from sklearn.datasets import load_iris
1
2
featrue = load_iris()['data']
print(featrue.shape)
(150, 4)

编写算法

1
import numpy as np
1
2
K = 3  # 聚类数量
epochs = 50 # 最大迭代次数
1
2
3
# 随机选取初始类中心
random_index = np.random.choice(len(featrue), 3)
first_class_center = featrue[random_index]
1
2
3
4
5
6
7
8
9
10
# 求距离
# 定义距离公式函数
def dist(a, b):
return np.sqrt(np.sum((a - b) ** 2))
# 定义距离矩阵容器
dis_matrix = np.zeros((len(featrue), K))
# 计算距离
for i in range(len(featrue)):
for j in range(K):
dis_matrix[i, j] = dist(featrue[i], first_class_center[j])
1
2
# 归类
labels = np.argmin(dis_matrix, axis=1)
1
np.mean(featrue[labels == 2], axis=0)
array([7.10454545, 3.15454545, 6.02272727, 2.15909091])
1
2
3
4
# 求新类中心
class_center = np.zeros_like(first_class_center)
for i in range(K):
class_center[i] = np.mean(featrue[labels == i], axis=0)
1
2
3
4
5
6
# 判断
if np.all(class_center == first_class_center):
print('聚类结束')
else:
first_class_center = class_center
print('聚类未结束,继续迭代!')
聚类未结束,继续迭代!

循环迭代聚类过程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# 循环迭代过程(作业)
K = 3 # 聚类数量
epochs = 50 # 最大迭代次数
# 1. 随机选取初始类中心
first_class_center = featrue[np.random.choice(len(featrue), K)]

# 定义距离公式函数
def dist(a, b):
return np.sqrt(np.sum((a - b) ** 2))

# 定义距离矩阵容器
dis_matrix = np.zeros((len(featrue), K))

for epoch in range(epochs):
# 2. 计算距离
for i in range(len(featrue)):
for j in range(K):
dis_matrix[i, j] = dist(featrue[i], first_class_center[j])
# 3. 归类
labels = np.argmin(dis_matrix, axis=1)
# 4. 求新类中心
class_center = np.zeros_like(first_class_center)
for i in range(K):
class_center[i] = np.mean(featrue[labels == i], axis=0)
# 5. 判断
if np.all(class_center == first_class_center):
print('聚类结束')
break
else:
first_class_center = class_center
print('-'*50)
print(f'epoch: {epoch+1}\nclass_center:\n{first_class_center}')
--------------------------------------------------
epoch: 1
class_center:
[[5.006      3.428      1.462      0.246     ]
 [6.59846154 2.98615385 5.35230769 1.88923077]
 [5.63714286 2.66       4.07714286 1.28      ]]
--------------------------------------------------
epoch: 2
class_center:
[[5.006      3.428      1.462      0.246     ]
 [6.62786885 3.00163934 5.39836066 1.92131148]
 [5.68974359 2.66923077 4.13589744 1.29230769]]
--------------------------------------------------
epoch: 3
class_center:
[[5.006      3.428      1.462      0.246     ]
 [6.64827586 3.00689655 5.43793103 1.9362069 ]
 [5.72857143 2.68571429 4.17142857 1.31666667]]
--------------------------------------------------
epoch: 4
class_center:
[[5.006      3.428      1.462      0.246     ]
 [6.67735849 3.00754717 5.51320755 1.97169811]
 [5.79361702 2.71914894 4.2212766  1.34255319]]
--------------------------------------------------
epoch: 5
class_center:
[[5.006 3.428 1.462 0.246]
 [6.702 3.016 5.556 1.992]
 [5.822 2.728 4.256 1.36 ]]
--------------------------------------------------
epoch: 6
class_center:
[[5.006      3.428      1.462      0.246     ]
 [6.76956522 3.03695652 5.6        2.00869565]
 [5.82962963 2.73148148 4.31481481 1.39259259]]
--------------------------------------------------
epoch: 7
class_center:
[[5.006      3.428      1.462      0.246     ]
 [6.80232558 3.04418605 5.64883721 2.03023256]
 [5.85438596 2.74210526 4.34561404 1.40877193]]
--------------------------------------------------
epoch: 8
class_center:
[[5.006      3.428      1.462      0.246     ]
 [6.8275     3.07       5.7        2.0625    ]
 [5.885      2.74       4.37666667 1.41833333]]
--------------------------------------------------
epoch: 9
class_center:
[[5.006      3.428      1.462      0.246     ]
 [6.85384615 3.07692308 5.71538462 2.05384615]
 [5.88360656 2.74098361 4.38852459 1.43442623]]
聚类结束
1
labels
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1,
       1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 2, 2, 1, 1, 1, 1,
       1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 2], dtype=int64)

对代码进行封装

封装为函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
def KMeans_fun(x, K=3, epochs=50):
# 1. 随机选取初始类中心
first_class_center = x[np.random.choice(len(x), K)]

# 定义距离公式函数
def dist(a, b):
return np.sqrt(np.sum((a - b) ** 2))

# 定义距离矩阵容器
dis_matrix = np.zeros((len(x), K))

for epoch in range(epochs):
# 2. 计算距离
for i in range(len(x)):
for j in range(K):
dis_matrix[i, j] = dist(x[i], first_class_center[j])
# 3. 归类
labels = np.argmin(dis_matrix, axis=1)
# 4. 求新类中心
class_center = np.zeros_like(first_class_center)
for i in range(K):
class_center[i] = np.mean(x[labels == i], axis=0)
# 5. 判断
if np.all(class_center == first_class_center):
print('聚类结束')
break
else:
first_class_center = class_center
print('-'*50)
print(f'epoch: {epoch+1}\nclass_center:\n{first_class_center}')
1
2
# K=1
KMeans_fun(featrue, 1)
--------------------------------------------------
epoch: 1
class_center:
[[5.84333333 3.05733333 3.758      1.19933333]]
聚类结束
1
2
# K=2
KMeans_fun(featrue, 2)
--------------------------------------------------
epoch: 1
class_center:
[[6.81       3.0525     5.7075     2.075     ]
 [5.49181818 3.05909091 3.04909091 0.88090909]]
--------------------------------------------------
epoch: 2
class_center:
[[6.55285714 2.96571429 5.30142857 1.86571429]
 [5.2225     3.1375     2.4075     0.61625   ]]
--------------------------------------------------
epoch: 3
class_center:
[[6.36777778 2.91666667 5.05666667 1.74      ]
 [5.05666667 3.26833333 1.81       0.38833333]]
--------------------------------------------------
epoch: 4
class_center:
[[6.31458333 2.89583333 4.97395833 1.703125  ]
 [5.00555556 3.34444444 1.5962963  0.3037037 ]]
--------------------------------------------------
epoch: 5
class_center:
[[6.30103093 2.88659794 4.95876289 1.69587629]
 [5.00566038 3.36981132 1.56037736 0.29056604]]
聚类结束
1
2
# K=3
KMeans_fun(featrue, 3)
--------------------------------------------------
epoch: 1
class_center:
[[5.52212389 3.0539823  3.1        0.90265487]
 [7.41538462 3.07692308 6.26153846 2.03076923]
 [6.50416667 3.0625     5.5        2.14583333]]
--------------------------------------------------
epoch: 2
class_center:
[[5.2025641  3.14230769 2.35641026 0.59615385]
 [7.475      3.125      6.3        2.05      ]
 [6.35       2.93333333 5.07166667 1.81333333]]
--------------------------------------------------
epoch: 3
class_center:
[[5.04482759 3.29137931 1.73793103 0.35862069]
 [7.38571429 3.13571429 6.22857143 2.08571429]
 [6.16025641 2.86923077 4.81666667 1.66538462]]
--------------------------------------------------
epoch: 4
class_center:
[[5.00566038 3.36981132 1.56037736 0.29056604]
 [7.12272727 3.11363636 6.03181818 2.13181818]
 [6.06       2.82       4.644      1.568     ]]
--------------------------------------------------
epoch: 5
class_center:
[[5.00784314 3.40980392 1.49215686 0.2627451 ]
 [6.9125     3.1        5.846875   2.13125   ]
 [5.96865672 2.76865672 4.48507463 1.46716418]]
--------------------------------------------------
epoch: 6
class_center:
[[5.006      3.428      1.462      0.246     ]
 [6.87428571 3.08857143 5.79142857 2.11714286]
 [5.93230769 2.75538462 4.42923077 1.43846154]]
--------------------------------------------------
epoch: 7
class_center:
[[5.006      3.428      1.462      0.246     ]
 [6.85       3.07368421 5.74210526 2.07105263]
 [5.9016129  2.7483871  4.39354839 1.43387097]]
聚类结束
1
2
# K=4
KMeans_fun(featrue, 4)
--------------------------------------------------
epoch: 1
class_center:
[[4.853125   3.18125    1.509375   0.26875   ]
 [6.01973684 2.80394737 4.60263158 1.54342105]
 [5.26842105 3.79473684 1.46315789 0.25263158]
 [7.11304348 3.11304348 5.99130435 2.13913043]]
--------------------------------------------------
epoch: 2
class_center:
[[4.7862069  3.16206897 1.49310345 0.26551724]
 [5.96865672 2.76865672 4.48507463 1.46716418]
 [5.3        3.73636364 1.49090909 0.25909091]
 [6.9125     3.1        5.846875   2.13125   ]]
--------------------------------------------------
epoch: 3
class_center:
[[4.744      3.152      1.436      0.22      ]
 [5.93230769 2.75538462 4.42923077 1.43846154]
 [5.268      3.704      1.488      0.272     ]
 [6.87428571 3.08857143 5.79142857 2.11714286]]
--------------------------------------------------
epoch: 4
class_center:
[[4.72916667 3.14583333 1.425      0.20833333]
 [5.9016129  2.7483871  4.39354839 1.43387097]
 [5.26153846 3.68846154 1.49615385 0.28076923]
 [6.85       3.07368421 5.74210526 2.07105263]]
--------------------------------------------------
epoch: 5
class_center:
[[4.7173913  3.13478261 1.4173913  0.2       ]
 [5.9016129  2.7483871  4.39354839 1.43387097]
 [5.25185185 3.67777778 1.5        0.28518519]
 [6.85       3.07368421 5.74210526 2.07105263]]
--------------------------------------------------
epoch: 6
class_center:
[[4.70454545 3.12272727 1.41363636 0.2       ]
 [5.9016129  2.7483871  4.39354839 1.43387097]
 [5.24285714 3.66785714 1.5        0.28214286]
 [6.85       3.07368421 5.74210526 2.07105263]]
聚类结束
1
2
# K=5
KMeans_fun(featrue, 5)
--------------------------------------------------
epoch: 1
class_center:
[[4.53333333 2.96666667 1.34444444 0.2       ]
 [5.1097561  3.52926829 1.48780488 0.25609756]
 [5.4125     2.46875    3.74375    1.16875   ]
 [6.67333333 2.99111111 5.64222222 2.04444444]
 [6.13589744 2.9        4.53333333 1.45897436]]
--------------------------------------------------
epoch: 2
class_center:
[[4.66470588 3.04705882 1.41176471 0.2       ]
 [5.18181818 3.62424242 1.48787879 0.26969697]
 [5.46666667 2.53809524 3.84761905 1.18095238]
 [6.82105263 3.06578947 5.74736842 2.09473684]
 [6.15121951 2.86341463 4.66829268 1.54146341]]
--------------------------------------------------
epoch: 3
class_center:
[[4.685      3.1        1.39       0.2       ]
 [5.22       3.64666667 1.51       0.27666667]
 [5.5125     2.58333333 3.88333333 1.19166667]
 [6.87428571 3.08857143 5.79142857 2.11714286]
 [6.17804878 2.85609756 4.74878049 1.58292683]]
--------------------------------------------------
epoch: 4
class_center:
[[4.70454545 3.12272727 1.41363636 0.2       ]
 [5.24285714 3.66785714 1.5        0.28214286]
 [5.508      2.6        3.908      1.204     ]
 [6.9125     3.1        5.846875   2.13125   ]
 [6.21627907 2.86046512 4.78604651 1.61162791]]
--------------------------------------------------
epoch: 5
class_center:
[[4.70454545 3.12272727 1.41363636 0.2       ]
 [5.24285714 3.66785714 1.5        0.28214286]
 [5.51538462 2.60769231 3.93076923 1.20769231]
 [6.9125     3.1        5.846875   2.13125   ]
 [6.22857143 2.86190476 4.79285714 1.61904762]]
--------------------------------------------------
epoch: 6
class_center:
[[4.70454545 3.12272727 1.41363636 0.2       ]
 [5.24285714 3.66785714 1.5        0.28214286]
 [5.52962963 2.62222222 3.94074074 1.21851852]
 [6.9125     3.1        5.846875   2.13125   ]
 [6.23658537 2.85853659 4.80731707 1.62195122]]
聚类结束

封装为类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class KMeans_class:
def __init__(self, K=3, epochs=50):
self.K = K
self.epochs = epochs

# 定义距离公式函数
def dist(self, a, b):
return np.sqrt(np.sum((a - b) ** 2))

def fit(self, x):
self.centers_ = x[np.random.choice(len(x), self.K)]
self.labels_ = np.zeros(len(x))
# 定义距离矩阵容器
dist_matrix = np.zeros((len(x), self.K))

for epoch in range(self.epochs):
# 2. 计算距离
for i in range(len(x)):
for j in range(self.K):
dist_matrix[i, j] = self.dist(x[i], self.centers_[j])
# 3. 归类
self.labels_ = np.argmin(dist_matrix, axis=1)
# 4. 求新类中心
class_center = np.zeros_like(self.centers_)
for i in range(self.K):
class_center[i] = np.mean(x[self.labels_ == i], axis=0)
# 5. 判断
if np.all(class_center == self.centers_):
print('聚类结束')
break
else:
self.centers_ = class_center
print('-'*50)
print(f'epoch: {epoch+1}\nclass_center:\n{self.centers_}')
1
2
kmeans = KMeans_class(K=1)
kmeans.fit(featrue)
--------------------------------------------------
epoch: 1
class_center:
[[5.84333333 3.05733333 3.758      1.19933333]]
聚类结束
1
2
kmeans = KMeans_class(K=2)
kmeans.fit(featrue)
--------------------------------------------------
epoch: 1
class_center:
[[5.72214286 3.05071429 3.57       1.13571429]
 [7.54       3.15       6.39       2.09      ]]
--------------------------------------------------
epoch: 2
class_center:
[[5.50178571 3.05178571 3.08482143 0.90357143]
 [6.85       3.07368421 5.74210526 2.07105263]]
--------------------------------------------------
epoch: 3
class_center:
[[5.23580247 3.12716049 2.43209877 0.62469136]
 [6.55652174 2.97536232 5.31449275 1.87391304]]
--------------------------------------------------
epoch: 4
class_center:
[[5.07580645 3.24677419 1.87903226 0.41612903]
 [6.38409091 2.92386364 5.08181818 1.75113636]]
--------------------------------------------------
epoch: 5
class_center:
[[5.00555556 3.34444444 1.5962963  0.3037037 ]
 [6.31458333 2.89583333 4.97395833 1.703125  ]]
--------------------------------------------------
epoch: 6
class_center:
[[5.00566038 3.36981132 1.56037736 0.29056604]
 [6.30103093 2.88659794 4.95876289 1.69587629]]
聚类结束
1
2
kmeans = KMeans_class(K=3)
kmeans.fit(featrue)
--------------------------------------------------
epoch: 1
class_center:
[[6.6826087  2.98478261 5.62391304 2.03695652]
 [5.0828125  3.2125     1.9546875  0.446875  ]
 [6.095      2.8925     4.4975     1.44      ]]
--------------------------------------------------
epoch: 2
class_center:
[[6.82105263 3.06578947 5.74736842 2.09473684]
 [5.00566038 3.36981132 1.56037736 0.29056604]
 [5.96610169 2.77118644 4.45084746 1.43898305]]
--------------------------------------------------
epoch: 3
class_center:
[[6.85       3.07368421 5.74210526 2.07105263]
 [5.006      3.428      1.462      0.246     ]
 [5.9016129  2.7483871  4.39354839 1.43387097]]
聚类结束
1
2
kmeans = KMeans_class(K=4)
kmeans.fit(featrue)
--------------------------------------------------
epoch: 1
class_center:
[[5.95       2.88518519 4.77222222 1.66111111]
 [6.78333333 2.90952381 5.23333333 1.75714286]
 [4.55833333 3.075      1.31666667 0.20833333]
 [5.13333333 3.42142857 1.67619048 0.33095238]]
--------------------------------------------------
epoch: 2
class_center:
[[5.86724138 2.73793103 4.4        1.43965517]
 [6.84878049 3.07073171 5.66829268 2.02439024]
 [4.66111111 3.08888889 1.36666667 0.2       ]
 [5.1969697  3.58484848 1.56060606 0.2969697 ]]
--------------------------------------------------
epoch: 3
class_center:
[[5.88360656 2.74098361 4.38852459 1.43442623]
 [6.85384615 3.07692308 5.71538462 2.05384615]
 [4.685      3.1        1.39       0.2       ]
 [5.22       3.64666667 1.51       0.27666667]]
--------------------------------------------------
epoch: 4
class_center:
[[5.88360656 2.74098361 4.38852459 1.43442623]
 [6.85384615 3.07692308 5.71538462 2.05384615]
 [4.70454545 3.12272727 1.41363636 0.2       ]
 [5.24285714 3.66785714 1.5        0.28214286]]
聚类结束
1
2
kmeans = KMeans_class(K=5)
kmeans.fit(featrue)
--------------------------------------------------
epoch: 1
class_center:
[[7.17368421 3.15263158 6.10526316 2.15263158]
 [5.24285714 3.66785714 1.5        0.28214286]
 [6.175      3.2        5.35       2.35      ]
 [6.09863014 2.81232877 4.65890411 1.55068493]
 [4.75       2.99615385 1.7        0.32692308]]
--------------------------------------------------
epoch: 2
class_center:
[[7.27647059 3.10588235 6.12352941 2.07058824]
 [5.21333333 3.65333333 1.48       0.27666667]
 [6.45789474 3.05789474 5.45789474 2.17368421]
 [5.98032787 2.77213115 4.47868852 1.44262295]
 [4.73478261 3.         1.66521739 0.30869565]]
--------------------------------------------------
epoch: 3
class_center:
[[7.42307692 3.13076923 6.26923077 2.06923077]
 [5.20645161 3.63870968 1.47096774 0.27419355]
 [6.51481481 3.01481481 5.43703704 2.07777778]
 [5.91016949 2.7559322  4.39491525 1.41525424]
 [4.7        3.055      1.525      0.245     ]]
--------------------------------------------------
epoch: 4
class_center:
[[7.475      3.125      6.3        2.05      ]
 [5.24285714 3.66785714 1.5        0.28214286]
 [6.50625    3.009375   5.390625   2.028125  ]
 [5.8625     2.73928571 4.33035714 1.39464286]
 [4.70454545 3.12272727 1.41363636 0.2       ]]
--------------------------------------------------
epoch: 5
class_center:
[[7.475      3.125      6.3        2.05      ]
 [5.24285714 3.66785714 1.5        0.28214286]
 [6.45789474 2.98157895 5.32105263 1.97368421]
 [5.822      2.728      4.256      1.36      ]
 [4.70454545 3.12272727 1.41363636 0.2       ]]
--------------------------------------------------
epoch: 6
class_center:
[[7.475      3.125      6.3        2.05      ]
 [5.24285714 3.66785714 1.5        0.28214286]
 [6.40454545 2.96818182 5.25454545 1.94318182]
 [5.78863636 2.70681818 4.17727273 1.30681818]
 [4.70454545 3.12272727 1.41363636 0.2       ]]
--------------------------------------------------
epoch: 7
class_center:
[[7.42307692 3.13076923 6.26923077 2.06923077]
 [5.24285714 3.66785714 1.5        0.28214286]
 [6.37173913 2.93913043 5.21304348 1.9173913 ]
 [5.77073171 2.71463415 4.12926829 1.2804878 ]
 [4.70454545 3.12272727 1.41363636 0.2       ]]
--------------------------------------------------
epoch: 8
class_center:
[[7.38571429 3.13571429 6.22857143 2.08571429]
 [5.24285714 3.66785714 1.5        0.28214286]
 [6.37291667 2.94166667 5.15833333 1.87708333]
 [5.70789474 2.68684211 4.1        1.27105263]
 [4.70454545 3.12272727 1.41363636 0.2       ]]
--------------------------------------------------
epoch: 9
class_center:
[[7.38571429 3.13571429 6.22857143 2.08571429]
 [5.24285714 3.66785714 1.5        0.28214286]
 [6.36470588 2.95098039 5.12156863 1.85294118]
 [5.66285714 2.65142857 4.06285714 1.25428571]
 [4.70454545 3.12272727 1.41363636 0.2       ]]
--------------------------------------------------
epoch: 10
class_center:
[[7.34       3.14666667 6.19333333 2.11333333]
 [5.24285714 3.66785714 1.5        0.28214286]
 [6.35294118 2.94509804 5.1        1.83137255]
 [5.65       2.64117647 4.04705882 1.25      ]
 [4.70454545 3.12272727 1.41363636 0.2       ]]
--------------------------------------------------
epoch: 11
class_center:
[[7.24117647 3.16470588 6.15294118 2.13529412]
 [5.24285714 3.66785714 1.5        0.28214286]
 [6.342      2.928      5.062      1.8       ]
 [5.63636364 2.63636364 4.02727273 1.25151515]
 [4.70454545 3.12272727 1.41363636 0.2       ]]
--------------------------------------------------
epoch: 12
class_center:
[[7.17368421 3.15263158 6.10526316 2.15263158]
 [5.24285714 3.66785714 1.5        0.28214286]
 [6.33125    2.92291667 5.03541667 1.77916667]
 [5.63636364 2.63636364 4.02727273 1.25151515]
 [4.70454545 3.12272727 1.41363636 0.2       ]]
--------------------------------------------------
epoch: 13
class_center:
[[7.12272727 3.11363636 6.03181818 2.13181818]
 [5.24285714 3.66785714 1.5        0.28214286]
 [6.29347826 2.92608696 4.98913043 1.75869565]
 [5.625      2.628125   4.0125     1.24375   ]
 [4.70454545 3.12272727 1.41363636 0.2       ]]
--------------------------------------------------
epoch: 14
class_center:
[[7.12272727 3.11363636 6.03181818 2.13181818]
 [5.24285714 3.66785714 1.5        0.28214286]
 [6.29574468 2.92553191 4.97446809 1.74893617]
 [5.6        2.61935484 4.00322581 1.24193548]
 [4.70454545 3.12272727 1.41363636 0.2       ]]
--------------------------------------------------
epoch: 15
class_center:
[[7.08695652 3.12608696 6.01304348 2.14347826]
 [5.24285714 3.66785714 1.5        0.28214286]
 [6.29361702 2.9        4.95106383 1.72978723]
 [5.58       2.63333333 3.98666667 1.23333333]
 [4.70454545 3.12272727 1.41363636 0.2       ]]
聚类结束
1
kmeans.labels_
array([1, 4, 4, 4, 1, 1, 4, 1, 4, 4, 1, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1,
       4, 1, 4, 4, 1, 1, 1, 4, 4, 1, 1, 1, 4, 4, 1, 1, 4, 1, 1, 4, 4, 1,
       1, 4, 1, 4, 1, 4, 2, 2, 2, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 2, 3, 2,
       3, 3, 2, 3, 2, 3, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 2, 3, 2, 2, 3,
       3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 0, 2, 0, 0, 3, 0, 0, 0,
       2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 0, 2, 0, 2, 0, 0, 2, 2, 2, 0, 0, 0,
       2, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2], dtype=int64)

调用sklean内置的KMeans类

导入KMeans类

1
from sklearn.cluster import KMeans

模型的搭建和训练

1
2
3
4
5
6
# n_clusters: 聚类的簇数
# max_iter: 最大迭代次数
# verbose: 是否打印出训练过程
# n_init: 要进行几次参数的初始化工作,每次初始化会重新训练模型
# 最后返回出最佳训练模型。
kmeans = KMeans(n_clusters=3, max_iter=50, verbose=1, n_init=2)
1
kmeans.fit(featrue)
Initialization complete
Iteration 0, inertia 151.73
Iteration 1, inertia 99.88221298209365
Iteration 2, inertia 87.74180017807669
Iteration 3, inertia 84.95217943238866
Iteration 4, inertia 84.01277888865147
Iteration 5, inertia 83.04698186876973
Iteration 6, inertia 81.74960206772619
Iteration 7, inertia 80.80637600000001
Iteration 8, inertia 79.87357983461303
Iteration 9, inertia 79.34436414532674
Iteration 10, inertia 78.92130972222222
Iteration 11, inertia 78.8556658259773
Converged at iteration 11: strict convergence.
Initialization complete
Iteration 0, inertia 145.36999999999995
Iteration 1, inertia 80.74942872639375
Iteration 2, inertia 79.16851704724697
Iteration 3, inertia 78.92130972222222
Iteration 4, inertia 78.8556658259773
Converged at iteration 4: strict convergence.

KMeans(max_iter=50, n_clusters=3, n_init=2, verbose=1)

查看聚类参数

1
2
# 查看聚类中心
kmeans.cluster_centers_
array([[6.85384615, 3.07692308, 5.71538462, 2.05384615],
       [5.88360656, 2.74098361, 4.38852459, 1.43442623],
       [5.006     , 3.428     , 1.462     , 0.246     ]])
1
2
# 查看聚类的类别
kmeans.labels_
array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1])
-------------本文结束感谢您的阅读-------------