import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import *
import random
import operator

def knn(x_test,x_data,y_data,k= 5):
x_data_size = x_data.shape[0]
new_x_test = np.tile(x_test,(x_data_size,1))
diffMat = new_x_test - x_data
sqdiffmat = diffMat**2
distance = sqdiffmat .sum(axis=1)**0.5
sortDistance = distance.argsort()
class_num={}
for i in range(k):
notelabel=y_data[sortDistance[i]]
class_num[notelabel]=class_num.get( notelabel,0)+1

sorted_class_num = sorted(class_num.items(),key=operator.itemgetter(1),reverse=1)
knnclass = sorted_class_num[0][0]
return knnclass

def main():
iris_data= datasets.load_iris()
data = iris_data.data

#*************混ぜるデータ・打乱数据*************
index = [i for i in range(data.shape[0])]
random.shuffle(index)
data = data[index]
iris_data.target = iris_data.target[index]

train_data_num = int(data.shape[0]*0.8)
test_data_num = data.shape[0]-int(data.shape[0]*0.8)

x_train_data = data[:train_data_num]
y_train_data =iris_data.target[:train_data_num]
x_test_data =data[train_data_num:]
y_test_data =iris_data.target[train_data_num:]

print("train_data count = ",train_data_num)
print("test_data count = ",test_data_num)

predictions =[]

for i in range(test_data_num):
predictions.append(knn(x_test_data[i],x_train_data,y_train_data))

#print (predictions)

print (classification_report(y_test_data,predictions))
print (confusion_matrix(y_test_data,predictions))


main()

鸢尾花_KNN分类_数据