knn原理: 离 x 点最近的 k 个点的出现最多的类别,做为 x 点的分类标识
- from sklearn import datasets
- iris = datasets.load_iris()
- iris_X = iris.data # (n_samples, n_features)
- iris_y = iris.target
-
- import numpy as np
- np.unique(iris_y)
- # 将鸢尾属植物数据集分解为训练集和测试集
- # 随机排列,用于使分解的数据随机分布
- np.random.seed(0)
- indices = np.random.permutation(len(iris_X))
- iris_X_train = iris_X[indices[:-10]]
- iris_y_train = iris_y[indices[:-10]]
- iris_X_test = iris_X[indices[-10:]]
- iris_y_test = iris_y[indices[-10:]]
-
- # 创建和拟合一个最近邻分类器
- from sklearn.neighbors import KNeighborsClassifier
- knn = KNeighborsClassifier()
- knn.fit(iris_X_train, iris_y_train)
- KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None,
- n_jobs=1, n_neighbors=5, p=2, weights='uniform')
- print(knn.predict(iris_X_test))
- print(iris_y_test)
- # [1 2 1 0 0 0 2 1 2 0]
- # [1 1 1 0 0 0 2 1 2 0]