读取数据
import numpy as np
import pandas as pd
import scipy.io as sio
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False
data = sio.loadmat("D:\吴恩达机器学习与深度学习\CourseraML\ex5\data\ex5data1.mat") #加载数据
X, y = data['X'], data['y'] #训练集
Xtest, ytest = data['Xtest'], data['ytest'] #测试集
Xval, yval = data['Xval'], data['yval'] #验证集
X = np.insert(X, 0, 1, axis =1)
Xtest = np.insert(Xtest, 0, 1, axis =1)
Xval = np.insert(Xval, 0, 1, axis =1)
数据可视化
def dataPlot(): #数据可视化函数
plt.figure(figsize=(6, 4)) #新建画布
plt.plot(X[:,1], y, 'x' )
plt.xlabel("Change in water level")
plt.ylabel("Water flowing out of the dam")
plt.grid(True)
dataPlot()
假设函数和损失函数
def h(mytheta, myx): #定义假设函数
return np.dot(myx, mytheta)
def costFunction(mytheta, myx, myy, mylambda = 0): #定义损失函数
m = myx.shape[0] #样本数
myh = h(mytheta, myx).reshape(m, 1)
mycost = float(1./(2*m)*np.dot((myh - myy).T, (myh-myy)))
regterm = mylambda/(2*m)*np.dot(mytheta, mytheta.T)
return mycost + regterm
梯度函数
def computeGradient(mytheta, myx, myy, mylambda = 0): #定义梯度函数
mytheta = mytheta.reshape((mytheta.shape[0],1))
m = myx.shape[0]
myh = h(mytheta,myx).reshape((m,1))
grad = 1/(m)*np.dot(myx.T, (h(mytheta, myx)-myy))
regterm = mylambda/m*mytheta
regterm[0] = 0
regterm.reshape((grad.shape[0],1))
return (grad + regterm).flatten()
mytheta = np.array([1, 1]).T
print(computeGradient(mytheta, X, y, 1.))
最优化参数θ
from scipy import optimize
def optimizeTheta(mytheta, myx, myy, mylambda=0., print_output=True): #定义最优化theta的函数
fit_theta = optimize.fmin_cg(costFunction, x0= mytheta, fprime = computeGradient, args = (myx, myy, 0), maxiter = 500)
fit_theta = fit_theta.reshape((mytheta.shape[0],1))
return fit_theta
print(optimizeTheta(mytheta, X, y, 0))
theta1 = optimizeTheta(mytheta, X, y, 0)
可视化
plt.plot(X[:,1], y, 'x')
plt.plot(X[:,1], h(theta1, X))
可视化
def learningCurve(): #定义学习曲线
initial_theta = np.array([1, 1]).T
train_error, val_error = [], []
for i in range(1, X.shape[0]+1, 1):
train_subset = X[:i, :]
y_subset = y[:i]
#mym.append(y_subset.shape[0])
theta1 = optimize.fmin_cg(costFunction, x0= initial_theta, fprime = computeGradient, args = (train_subset, y_subset, 0), maxiter = 100)
train_error.append(costFunction(theta1, train_subset, y_subset, mylambda = 0))
val_error.append(costFunction(theta1, Xval, yval, mylambda = 0))
plt.figure(figsize = (6, 4))
#print(mym, train_error, val_error)
plt.plot(range(1,len(X)+1), train_error, label = "train")
plt.plot(range(1,len(X)+1), val_error, label = "val")
plt.legend()
plt.grid(True)
learningCurve()
特征映射
def polyFeature(myx, power): #定义特征映射函数
newx = myx.copy()
for i in range(2, power+1):
newx = np.insert(newx, newx.shape[1], np.power(newx[:,1], i), axis =1)
return newx
def featureNormalize(myx): #特征标准化
xnorm = myx.copy()
feature_means = np.mean(xnorm, axis=0) #按列求均值
feature_stds = np.std(xnorm, axis=0) #按列求方差
xnorm[:,1:] = (xnorm[:,1:] - feature_means[1:]) / feature_stds[1:]
return xnorm, feature_means, feature_stds
标准化
newx = polyFeature(X, power = 6)
xnorm, feature_means, feature_stds = featureNormalize(newx)
mytheta = np.ones((newx.shape[1], 1))
fit_theta = optimizeTheta(mytheta, xnorm, y )
拟合
def plotFit(): #多项式拟合函数曲线
xval = np.linspace(-55, 55, 50)
X = np.ones((50, 1))
X = np.insert(X, 1, xval.T, axis = 1)
newx = polyFeature(X, power = 6)
xnorm, feature_means, feature_stds = featureNormalize(newx)
yval = h(fit_theta, xnorm)
dataPlot()
plt.plot(xval, yval, 'r--')
plotFit()
多项式学习曲线可视化
def polyLearningCurve(): #定义多项式学习曲线
initial_theta = np.ones((7, 1)) #初始化theta
train_error, val_error = [], []
myXval = featureNormalize( polyFeature(Xval, 6))[0]
for i in range(1, X.shape[0]+1, 1):
train_subset = featureNormalize(polyFeature( X[:i, :], 6))[0]
y_subset = y[:i,:]
theta1 = optimize.fmin_cg(costFunction, x0 = initial_theta, fprime = computeGradient, args = (train_subset, y_subset, 0), maxiter = 2000)
train_error.append(costFunction(theta1, train_subset, y_subset, mylambda = 0))
val_error.append(costFunction(theta1, myXval,yval,0))
plt.figure(figsize = (6,4))
plt.plot(range(1,len(X)+1), train_error, label = "train_error")
plt.plot(range(1, len(X)+1), val_error, label = "val_error")
plt.legend()
return theta1
polyLearningCurve()
- 选择合适的正则系数λ
def lambdaError(): #确定正则系数
lambdas = [0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]
initial_theta = np.ones((7, 1)) #初始化theta
trainX = featureNormalize(polyFeature(X, 6))[0]
myXval = featureNormalize( polyFeature(Xval, 6))[0]
train_error, val_error = [], []
for mylambda in lambdas:
print(mylambda)
theta1 = optimize.fmin_cg(costFunction, x0 = initial_theta, fprime = computeGradient, args = (trainX, y, mylambda), maxiter = 500)
#theta1 = optimizeTheta(initial_theta, trainX, y, mylambda)
train_error.append(costFunction(theta1, trainX, y, 0))
val_error.append(costFunction(theta1, myXval, yval, 0))
plt.figure(figsize = (6, 4))
plt.plot(range(1,len(lambdas)+1), train_error, label = "train_error")
plt.plot(range(1, len(lambdas)+1), val_error, label = "val_error")
plt.legend()
lambdaError()
从图中看到验证误差最小的时候是lambda为0.03