1、feature.txt是关于性别预测的数据,第一列为身高,第二列为体重,第三列为鞋码(鞋子尺码),
第四列为性别(标签列).请根据feature.txt的数据预测[155,48,36](即身高为155,体重为48,鞋码为36)的人的性别;
import numpy as np
#from sklearn import neighbors
#from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
np.set_printoptions(suppress=True)
dataset = np.genfromtxt("feature.txt",dtype='str',encoding='utf-8')
x_data=dataset[:,0:-1]
labels=dataset[:,-1]
datasetnum=len(x_data)
datingmat=np.zeros((datasetnum,3))
for i in range(datasetnum):
datingmat[i]=x_data[i]
def autonorm(dataset):
minvals=dataset.min(0)
maxvals=dataset.max(0)
ranges=maxvals-minvals
m=dataset.shape[0]
normdataset=dataset-np.tile(minvals,(m,1))
normdataset=(normdataset/np.tile(ranges,(m,1))-0.5)*2
return normdataset
knn=KNeighborsClassifier(n_neighbors=1)
knn.fit(autonorm(datingmat),labels)
knn.score(autonorm(datingmat),labels)
knn.predict(np.array([[155,48,36]]))
2、创建1×1024的NumPy数组,然后打开给定的文件0_0.txt,循环读出文件的前32行,并将每行的头32个字符值存储在NumPy数组中,最后返回该数组。
import numpy as np
def img2vector(filename):
returnVect=np.zeros((1,1024))
fr=open(filename)
for i in range(32):
lineStr=fr.readline()
for j in range(32):
returnVect[0,32*i+j]=int(lineStr[j])
return returnVect
print(img2vector('0_0.txt'))