1.基于numpy实现

from numpy import tile, array


def classify0(inx, data_set, labels, k):
"""
原理:使用欧式距离公式:((xa0-xb0)**2-(xa1-xb1)**2)**0.5计算出输入点到各个点的欧式距离,按照距离从小到大排列,取出前前k个的类别出现的
频率,频率最高的一个的类别即为预测分类
:param data_set: 训练的数据集
:param inx: 输入向量
:param labels: 标签向量
:param k: 最近邻数量
:return:
"""
# 通过shape获取行和列,返回的是包含两个元素的元组,即(行数,列数)
dats_set_size = data_set.shape[0]
# numpy.tile(data,(x,y)) 将数据data扩展到x行y列
# 计算输入向量和训练集的距离差,即x1-x0
diff_mat = tile(inx, (dats_set_size, 1)) - data_set
# 坐标差的平方,即(x1-x0)^2
square_diff_mat = diff_mat ** 2
# 对坐标差的平方进行求和
square_diff_mat_sum = square_diff_mat.sum(axis=1)
# 欧式距离
distance = square_diff_mat_sum ** 0.5
# 对距离进行排序
sort_distance = distance.argsort()
# 训练数据
class_count_map = dict()
for i in range(k):
vote_label = labels[sort_distance[i]]
class_count_map.update({vote_label: class_count_map.get(vote_label, 0) + 1})
# 训练的结果
sort_class_count = sorted(class_count_map.items(), key=lambda x: x[1], reverse=True)
# 返回预测结果
return sort_class_count[0][0]


def create_data_set_label():
data_set = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
labels = ["A", "A", "B", "B"]
return data_set, labels


if __name__ == "__main__":
data_set, labels = create_data_set_label()
inx = [0, 0]
predict = classify0(inx, data_set, labels, 3)
print(predict)

2.基于tensorflow实现

import numpy
import tensorflow as tf

from numpy import array, tile


def get_data_distance(data, input_data):
dats_size = data.shape[0]
diff_mat = tf.subtract(data, tile(input_data, (dats_size, 1)))
square_diff_mat = tf.square(diff_mat)
square_diff_mat_sum = tf.reduce_sum(square_diff_mat, axis=1)
distance = tf.sqrt(square_diff_mat_sum)
return data, distance


def classify0(data_set: array, labels: array, inx, k: int):
train_dataset = tf.data.Dataset.from_tensor_slices(data_set)
train_dataset = train_dataset.map(map_func=lambda d: get_data_distance(d, inx))
distance = []
for x in train_dataset.as_numpy_iterator():
distance.append(x[1][0])
distance = numpy.array(distance)
sort_distance = distance.argsort()

class_count_map = dict()
for i in range(k):
vote_label = labels[sort_distance[i]]
class_count_map.update({vote_label: class_count_map.get(vote_label, 0) + 1})
sort_class_count = sorted(class_count_map.items(), key=lambda m: m[1], reverse=True)
return sort_class_count[0][0]


def create_data_set_label():
data_array = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
label_array = ["A", "A", "B", "B"]
return data_array, label_array


if __name__ == "__main__":
data_arr, label_arr = create_data_set_label()
x = [0, 0]
predict = classify0(data_arr, label_arr, x, 3)
print(predict)

3.基于pytorch

import numpy as np
from numpy import array, tile
import torch
from tqdm import tqdm


def classify0(inx, data_set, labels, k):
dats_set_size = data_set.shape[0]
test_x = torch.Tensor(tile(inx, (dats_set_size, 1)))
tran_x = torch.Tensor(data_set)
sort_class_count = []
for x in tqdm(test_x):
dists = []
for y in tran_x:
distance = torch.sum((x - y) ** 2) ** 0.5
dists.append(distance.view(1))

idxes = torch.cat(dists).argsort()[:k]
unique, counts = np.unique(np.array([labels[idx] for idx in idxes]), return_counts=True)
class_count_map = dict(zip(unique, counts))
sort_class_count = sorted(class_count_map.items(), key=lambda d: d[1], reverse=True)
# 返回预测结果
if sort_class_count:
return sort_class_count[0][0]
else:
return None


def create_data_set_label():
data_set = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
labels = ["A", "A", "B", "B"]
return data_set, labels


if __name__ == "__main__":
data_set, labels = create_data_set_label()
inx = [0, 0]
predict = classify0(inx, data_set, labels, 3)
print(predict)

4.基于sklearn实现

from numpy import array, tile
from sklearn.neighbors import KNeighborsClassifier


def classify0(inx, data_set, labels, k):
kNN_classifier = KNeighborsClassifier(n_neighbors=k)
kNN_classifier.fit(data_set, labels)
x_predict = array(inx).reshape(1, -1)
y_predict = kNN_classifier.predict(x_predict)
return y_predict[0]


def create_data_set_label():
data_set = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
labels = ["A", "A", "B", "B"]
return data_set, labels


if __name__ == "__main__":
data_set, labels = create_data_set_label()
inx = [0, 0]
predict = classify0(inx, data_set, labels, 3)
print(predict)