1.将distance.txt里面的数据进行归一化处理,转化为[-1,1]之间
import numpy as np
a=np.genfromtxt("distance.txt",delimiter=",")
np.set_printoptions(suppress=True)
#print(array(a))
def autoNorm(dataSet):
minVals=dataSet.min(0)
maxVals=dataSet.max(0)
ranges=maxVals-minVals
m=dataSet.shape[0]
normDataSet=dataSet-np.tile(minVals,(m,1))
normDataSet=(normDataSet/tile(ranges,(m,1))-0.5)*2
return normDataSet
print(autoNorm(array(a)))
2.读取datingTestSet.txt的数据,因为前3列代表的为3个特征,最后一列是标签(喜欢程度)。
将datingTestSet.txt的特征集用returnMat表示,标签用labels表示,
打印returnMat和labels
fr=open("datingTestSet.txt")
arrayOLines=fr.readlines()
numberOfLines=len(arrayOLines)
returnMat=zeros((numberOfLines,3))
labels=[]
index=0
for line in arrayOLines:
line=line.strip()
listFromLine=line.split('\t')
returnMat[index,:]=listFromLine[0:3]
if listFromLine[-1]=="didntLike":
labels.append(1)
elif listFromLine[-1]=="smallDoses":
labels.append(2)
elif listFromLine[-1]=="largeDoses":
labels.append(3)
index +=1
print(returnMat)
print(labels)