梯度方向是一个函数下降最快的方向, 使用梯度下降可以得到局部最优解
定义回归函数:
h(x) = theta0 + theta1 * x
定义代价函数:(误差函数)
J(theta0, theta1) = 1/(2*m) *(i = 1:m, plus((h(x[i] - y[i])^2)) (平方代价函数)
目标是使得代价函数取得最小值:
Min(J(theta0, theta1))
分别对theta0 和 theta1 求偏导数:
d(J(theta0, theta1) / d(theta0) = (1/m)*(h(x[i] - y[i])
d(J(theta0, theta1) / d(theta1) = (1/m)*(h(x[i] - y[i])*x[i]
偏导数即为函数要下降的方向
设置学习速率alpha:
选取合适的alpha, 太大会导致代价函数找不到局部最优解,甚至是增大. 太小会导致迭代次数过高,时间太长。
C++代码:
#include <iostream>
#include <cstdio>
#include <cmath>
#define h(x) theta0 + theta1*x
using namespace std;
const int MAXN = 100;
double theta0, theta1;
double x[MAXN], y[MAXN];
int n;
//计算误差
double Error(int n){
double error = 0.0;
for(int i = 0; i < n; ++i) error += (h(x[i]) - y[i])*(h(x[i]) - y[i]);
error = (1.0*error)/(2*n);
return error;
}
//梯度下降
void GradientDescent(int n, double alpha, double eps, int iter = 1000){
double e = Error(n);
while(--iter){
double t0 = 0.0, t1 = 0.0;
for(int i = 0; i < n; ++i){
t0 += (h(x[i]) - y[i]);
t1 += (h(x[i]) - y[i])*x[i];
}
//更新theta
theta0 = theta0 - alpha*t0/n;
theta1 = theta1 - alpha*t1/n;
if(fabs(e - Error(n)) < eps) break;
e = Error(n);
}
printf("theta0 = %f\n theta1 = %f\n", theta0, theta1);
}
int main(){
cin >> n;
for(int i = 0; i < n; ++i) cin >> x[i] >> y[i];
GradientDescent(n, 0.03, 0.5*1e-16);
return 0;
}