import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier

加载样本数据

  • matplotlib读取图片
  • 将每一张图片转化为一组数组

bmc中的kvm是什么_bmc中的kvm是什么

bmp1 = plt.imread('digits/0/0_1.bmp')
# 查看形状
display(bmp1.shape)
# 显示
plt.imshow(bmp1)
(28, 28)





<matplotlib.image.AxesImage at 0x5d80c50>

bmc中的kvm是什么_加载_02

# 改变显示的颜色
bmp1 = plt.imread('digits/0/0_1.bmp')
# 查看形状
display(bmp1.shape)
# 显示
plt.imshow(bmp1,cmap='gray')
(28, 28)





<matplotlib.image.AxesImage at 0x5dfdeb0>

bmc中的kvm是什么_机器学习_03

尝试将两张图片转化为2行 28*28列

bmp2 = plt.imread('digits/0/0_2.bmp')
digits =[]
# 把二维数组变成一维数组
digits.append(bmp1.ravel())  # bmp1.reshape(-1)
digits.append(bmp2.ravel())

digits = np.array(digits)
digits.shape
(2, 784)

尝试将数组中某一张图片进行显示

plt.imshow(digits[0].reshape(28,28),cmap='gray')
<matplotlib.image.AxesImage at 0x4ca1e90>

bmc中的kvm是什么_git_04

批量化读取所有的图片

  • 样式数据:每一张图片的一维数组
  • 标签:文件目录文件
data = []  # 完成的目标 (5000,784)
target = [] # 完成的目标(5000,)

for label in range(10):
    for index in range(1,501):
        bmp_filename = f'digits/{label}/{label}_{index}.bmp'
        bmp = plt.imread(bmp_filename)
        data.append(bmp.ravel())
        target.append(label)
        
# 将list转化为ndarray
data = np.array(data)
target = np.array(target)

display(data.shape,target.shape)
(5000, 784)



(5000,)

创建分类模型

  • KNN 分类
  • 邻近数量:5,7,9,11
  • weights: uniform,distance # 权重
knn = KNeighborsClassifier(n_neighbors=7)
"""
KNeighborsClassifier(
    n_neighbors=5,
    weights='uniform',
    algorithm='auto',
    leaf_size=30,
    p=2,
    metric='minkowski',
    metric_params=None,
    n_jobs=None,
    **kwargs,
)
"""
from sklearn.model_selection import train_test_split as split
X_train,X_test,y_train,y_test = split(data,target,test_size=0.2)

拆分数据的训练

knn.fit(X_train,y_train)
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=7, p=2,
                     weights='uniform')
knn.score(X_test,y_test)
0.93
# n_neighbors=5
knn.set_params(n_neighbors=5)
knn.fit(X_train,y_train)
knn.score(X_test,y_test)
0.929
# n_neighbors=9
knn.set_params(n_neighbors=9)
knn.fit(X_train,y_train)
knn.score(X_test,y_test)
0.922
knn.set_params(n_neighbors=9,weights='distance')
knn.fit(X_train,y_train)
knn.score(X_test,y_test)
0.926
knn.set_params(n_neighbors=11)
knn.fit(X_train,y_train)
knn.score(X_test,y_test)
0.917

重新训练全部样式

knn.fit(data,target)
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=11, p=2,
                     weights='distance')
import os

加载测试样本

test1 = plt.imread('digits/test/4.bmp')
test1.shape
(28, 28, 3)
plt.imshow(test1.mean(axis=-1))
<matplotlib.image.AxesImage at 0xada3b30>

bmc中的kvm是什么_git_05

# 加灰度 
plt.imshow(test1.mean(axis=-1),cmap='gray')
<matplotlib.image.AxesImage at 0xaddf710>

bmc中的kvm是什么_git_06

test_data = []
test_target = []
for filename in os.listdir('digits/test'):
    print(filename)
    bmp_file = f'digits/test/{filename}'
    bmp =plt.imread(bmp_file)
    # bmp是三维数组(28,28,3) 最后一个维度的数据(3)是颜色值。 (r,g,b)
    # 对最内层的维度进行平均计算,得出一个单颜色值(0-255),即降维
    test_data.append(bmp.mean(axis=-1).ravel())
    
    # os.path.split() 分隔目录与文件名
    # os.path.splitext() 将文件名的名称和扩展名分隔出来
    
    label,ext_name = os.path.splitext(filename)
    test_target.append(label)
    
test_data = np.array(test_data)
test_target = np.array(test_target)

display(test_data.shape,test_target.shape)
4.bmp
5.bmp
6.bmp
7.bmp
8.bmp
9.bmp



(6, 784)



(6,)
test_target
array(['4', '5', '6', '7', '8', '9'], dtype='<U1')
test_data[0].shape
(784,)
test_data
array([[255., 255., 255., ..., 255., 255., 255.],
       [255., 255., 255., ..., 255., 255., 255.],
       [255., 255., 255., ..., 255., 255., 255.],
       [255., 255., 255., ..., 255., 255., 255.],
       [255., 255., 255., ..., 255., 255., 255.],
       [255., 255., 255., ..., 255., 255., 255.]])

预测测试数据集的结果

y_ = knn.predict(test_data)
y_
array([4, 4, 5, 1, 0, 5])

可视化的方式显示预测结果

plt.figure(figsize=(10,12))
for i,test_bmp in enumerate(test_data):
    # 两行三列
    plt.subplot(2,3,i+1)
    plt.imshow(test_bmp.reshape(28,28),cmap='gray')
    plt.title(f'True:{test_target[i]} Pred:{y_[i]}',size=20)
    plt.axis('off')   # 关闭坐标轴
plt.show()

bmc中的kvm是什么_机器学习_07

读取手写的数字

  • filename: digits/test/4_2.bmp
test_4_2 = plt.imread('digits/test/4_2.bmp')
test_4_2.shape
(28, 28, 4)
y_2  =knn.predict(test_4_2.mean(axis=-1).reshape(1,-1))
display(y_2)
array([1])
from pandas import Series
D:\yingyong\Anaconda3\lib\importlib\_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 112 from C header, got 124 from PyObject
  return f(*args, **kwds)
Series(test_4_2.ravel()).unique()
array([255,   0], dtype=uint64)
Series(bmp1.ravel()).unique()
array([255, 204,  96,   2, 205, 207,  17,   3,  18, 201,  28,  16,  22,
       198, 249, 245, 195,  31,  53, 171, 133,  92, 159,  66,  88,  65,
       141,  27, 208, 176,   0,  87,  76, 243, 180, 134, 234,  12, 217,
        90,  47, 248,  77,  15, 184, 236, 227,  60, 192,  57,  59, 179,
         9, 143, 107, 170,  25, 230, 120,  69,  32, 124,  30, 110,  82,
       169,  93,   6, 109, 226, 199,  26,  40, 125,  56, 127, 114, 218],
      dtype=uint64)

随机从训练集中抽取5000个数字图片

index = np.arange(5000)
index
array([   0,    1,    2, ..., 4997, 4998, 4999])
np.random.permutation(index)# 随机打乱
array([4660, 2679, 3913, ...,  525, 3990,  146])
# 测试50个
test_index = np.random.randint(5000,size=50)
data[test_index].shape
(50, 784)
y_ = knn.predict(data[test_index])
target[test_index]
array([3, 3, 7, 8, 4, 7, 4, 3, 7, 3, 6, 5, 9, 7, 1, 8, 8, 2, 1, 5, 6, 2,
       5, 7, 9, 3, 0, 3, 9, 2, 6, 0, 0, 6, 0, 8, 6, 7, 6, 2, 1, 0, 4, 9,
       9, 8, 5, 0, 4, 5])
y_
array([3, 3, 7, 8, 4, 7, 4, 3, 7, 3, 6, 5, 9, 7, 1, 8, 8, 2, 1, 5, 6, 2,
       5, 7, 9, 3, 0, 3, 9, 2, 6, 0, 0, 6, 0, 8, 6, 7, 6, 2, 1, 0, 4, 9,
       9, 8, 5, 0, 4, 5])
test_label = target[test_index]
test_label[test_label == y_].size / test_label.size
1.0
test_index = np.random.randint(5000,size=50)
y_ = knn.predict(data[test_index])
test_label = target[test_index]
test_label[test_label == y_].size / test_label.size
1.0
test_index = np.random.randint(1000,size=50)
y_ = knn.predict(data[test_index])
test_label = target[test_index]
test_label[test_label == y_].size / test_label.size
1.0
plt.figure(figsize=(30,20))
i =1 
for index in test_index:
    plt.subplot(5,10,i)
    
    plt.imshow(X_test[index].reshape(28,28),cmap='gray')
    plt.axis('off')
    plt.title(f'T:{y_test[index]} P:{y_[i-1]}',size=30)
    i += 1
    
plt.show()

bmc中的kvm是什么_bmc中的kvm是什么_08

通过mean()方式实现降维效果
a = np.array(
    [
        [
            [3,4,5],
            [5,6,7],
            [3,6,9]
        ],
        [
            [1,2,5],
            [3,5,7],
            [6,8,9]
        ]
    ]
)
a.shape   # 两个,三行三列
(2, 3, 3)
# 2 --->(3+1)/ 2       3 --->(4+2)/ 2       5 --->(5+5)/ 2    
# 4 --->(5+3)/ 2       5.5 --->(6+5)/ 2     7 --->(7+7)/ 2
# 4.5 --->(3+6)/ 2     7 --->(6+8)/ 2       9--->(9+9)/ 2
a.mean(axis=0)
array([[2. , 3. , 5. ],
       [4. , 5.5, 7. ],
       [4.5, 7. , 9. ]])
# 3.66666667 --->(3+5+3)/ 3    5.33333333  --->(4+6+6)/ 3    7.  --->(5+7+9)/ 3
# 3.33333333 --->(1+3+6)/ 3    5.  --->(2+5+8)/ 3    7.  --->(5+7+9)/ 3
a.mean(axis=1)
array([[3.66666667, 5.33333333, 7.        ],
       [3.33333333, 5.        , 7.        ]])
# 4. --->(3+4+5)/ 3             6.  --->(5+6+7)/ 3      6.  --->(3+6+9)/ 2 
# 2.66666667 --->(1+2+5)/ 3     5.--->(3+5+7)/ 3        7.66666667 --->(6+8+9)/ 2 

a.mean(axis=-1)
array([[4.        , 6.        , 6.        ],
       [2.66666667, 5.        , 7.66666667]])