数值优化方法:
在最优化中,经典的无约束最优化方法有:最速下降法、牛顿法和共轭梯度法等;多元函数有约束条件的极值的求法有拉格朗日乘数法等。
对于以上几种优化方法,网上有很多博客和论文资料对其进行了解释,在此不再对具体的内容进行讲解,本文主要参考以下几篇博主的文章:
本文主要参考以上博主文章,编写python代码,对经典的无约束最优化方法学习,以提高自身编程能力和算法知识水平,若有错误还望大家指出并批评指正,谢谢!
1、最速下降法:
最速下降法(Steepest descent)是梯度下降法的一种更具体实现形式,主要是在每次迭代中计算出更合适的步长 ,步长动态变化,使得目标函数值每次迭代时,能够得到最大程度的减少。最速下降法的计算量不大且是收敛的,但是收敛速度慢,特别是当迭代点接近最优点时,每次迭代行进的距离越来越短。
import matplotlib.pyplot as plt
from sympy import *
import numpy as np
'''===================最速下降法(The steepest descent method)==================='''
# 定义符号变量
x_1 = symbols('x_1')
x_2 = symbols('x_2')
# 定义目标函数
# fun = 2 * x_1 * x_2 + 2 * x_2 - x_1 ** 2 - 2 * x_2 ** 2
fun = x_1 ** 2 + 2 * x_2 ** 2 - 2 * x_1 * x_2 - 2 * x_2
# fun = x_1 ** 2 + x_2 ** 2
# 画三维图
figure = plt.figure()
axes = plt.axes(projection='3d')
X = np.arange(-10, 10, 0.25)
Y = np.arange(-10, 10, 0.25) # 前两个参数为自变量取值范围
X, Y = np.meshgrid(X, Y)
# Z = (2 * X * Y + 2 * Y - X ** 2 - 2 * Y ** 2) # z关于x,y的函数关系式,此处为锥面
Z = X ** 2 + 2 * Y ** 2 - 2 * X * Y - 2 * Y
# Z = X ** 2 + Y ** 2
axes.plot_surface(X, Y, Z, cmap='rainbow')
axes.set_xlabel('X')
axes.set_ylabel('Y')
axes.set_zlabel('Z')
axes.set_title('Gradient Descent Method')
# 对x_1和x_2求导
grad_1 = diff(fun, x_1)
grad_2 = diff(fun, x_2)
# 定义参数
MaxIter = 200
epsilon = 0.0001
# 定义迭代初始点
x_1_value = -2
x_2_value = 2
iter_cnt = 0
current_step_size = 100
grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()
print(
'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f step_size : %5.4f' % (
iter_cnt, x_1_value, x_2_value, current_obj, grad_1_value, grad_2_value, current_step_size))
while (iter_cnt <= MaxIter and abs(grad_1_value) + abs(grad_2_value) >= epsilon):
# while (abs(grad_1_value) + abs(grad_2_value) >= epsilon):
iter_cnt += 1
# 求迭代步长
t = symbols('t')
x_1_updated = x_1_value + grad_1_value * t
x_2_updated = x_2_value + grad_2_value * t
Fun_updated = fun.subs({x_1: x_1_updated, x_2: x_2_updated})
grad_t = diff(Fun_updated, t)
t_value = solve(grad_t, t)[0] # solve grad_t == 0
# update x_1_value and x_2_value
grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
x_1_value = (float)(x_1_value + t_value * grad_1_value)
x_2_value = (float)(x_2_value + t_value * grad_2_value)
current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()
current_step_size = t_value
# 迭代点变化
axes.scatter(x_1_value, x_2_value, current_obj, c='r', marker='*', linewidths=3) # 画出迭代点的变化
plt.pause(0.5)
print(
'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f step_size : %5.4f' % (
iter_cnt, x_1_value, x_2_value, current_obj, grad_1_value, grad_2_value, current_step_size))
# 显示
plt.show()
2、牛顿法:
import matplotlib.pyplot as plt
from sympy import *
import numpy as np
'''===================牛顿法(Newton Method)==================='''
# 定义符号变量
x_1 = symbols('x_1')
x_2 = symbols('x_2')
# 定义目标函数
# fun = 2 * x_1 * x_2 + 2 * x_2 - x_1 ** 2 - 2 * x_2 ** 2
# fun = x_1 ** 2 + 2 * x_2 ** 2 - 2 * x_1 * x_2 - 2 * x_2
# fun = x_1 ** 2 + x_2 ** 2
fun = (1 - x_1) ** 2 + 100 * (x_2 - x_1 ** 2) ** 2
# 画三维图
figure = plt.figure()
axes = plt.axes(projection='3d')
X = np.arange(-10, 10, 0.25)
Y = np.arange(-10, 10, 0.25) # 前两个参数为自变量取值范围
X, Y = np.meshgrid(X, Y)
# Z = (2 * X * Y + 2 * Y - X ** 2 - 2 * Y ** 2) # z关于x,y的函数关系式,此处为锥面
# Z = X ** 2 + 2 * Y ** 2 - 2 * X * Y - 2 * Y
# Z = X ** 2 + Y ** 2
Z = (1 - X) ** 2 + 100 * (Y - X ** 2) ** 2
axes.plot_surface(X, Y, Z, cmap='rainbow')
axes.set_xlabel('X')
axes.set_ylabel('Y')
axes.set_zlabel('Z')
axes.set_title('Newton Method')
# 对x_1和x_2求导
# 一阶导数
grad_1 = diff(fun, x_1)
grad_2 = diff(fun, x_2)
# 二阶导数
grad_12 = diff(grad_1, x_2)
grad_11 = diff(fun, x_1, 2)
grad_22 = diff(fun, x_2, 2)
# 定义参数
MaxIter = 200
epsilon = 0.0001
# 定义迭代初始点
x_1_value = -10
x_2_value = 10
iter_cnt = 0
current_step_size = 100
grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_12_value = (float)(grad_12.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_11_value = (float)(grad_11.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_22_value = (float)(grad_22.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()
current_obj_last = 1000
print(
'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f step_size : %5.4f' % (
iter_cnt, x_1_value, x_2_value, current_obj, grad_1_value, grad_2_value, current_step_size))
while (iter_cnt <= MaxIter and abs(current_obj_last - current_obj) >= epsilon):
# while (abs(grad_1_value) + abs(grad_2_value) >= epsilon):
current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()
iter_cnt += 1
# 构造梯度矩阵
G = np.vstack((grad_1_value, grad_2_value))
# 构造海森矩阵
H = np.vstack(([grad_11_value, grad_12_value],
[grad_12_value, grad_22_value]))
# 牛顿迭代法更新迭代点
z_value = np.vstack((x_1_value, x_2_value)) - np.linalg.inv(H) @ G
x_1_value = (float)(z_value[0])
x_2_value = (float)(z_value[1])
current_obj_last = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()
grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_12_value = (float)(grad_12.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_11_value = (float)(grad_11.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_22_value = (float)(grad_22.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
# 迭代点变化
axes.scatter(x_1_value, x_2_value, current_obj_last, c='r', marker='*', linewidths=3) # 画出迭代点的变化
plt.pause(0.5)
print(
'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f ' % (
iter_cnt, x_1_value, x_2_value, current_obj_last, grad_1_value, grad_2_value))
# 显示
plt.show()
3、共轭梯度法:
import matplotlib.pyplot as plt
from sympy import *
import numpy as np
'''===================共轭梯度法(Conjugate gradient method,CG)==================='''
# 定义符号变量
x_1 = symbols('x_1')
x_2 = symbols('x_2')
# 定义目标函数
# fun = -(2 * x_1 * x_2 + 2 * x_2 - x_1 ** 2 - 2 * x_2 ** 2)
# fun = x_1 ** 2 + 2 * x_2 ** 2 - 2 * x_1 * x_2 - 2 * x_2
# fun = x_1 ** 2 + x_2 ** 2
fun = (1 - x_1) ** 2 + 100 * (x_2 - x_1 ** 2) ** 2
# 画三维图
figure = plt.figure()
axes = plt.axes(projection='3d')
X = np.arange(-10, 10, 0.25)
Y = np.arange(-10, 10, 0.25) # 前两个参数为自变量取值范围
X, Y = np.meshgrid(X, Y)
# Z = -(2 * X * Y + 2 * Y - X ** 2 - 2 * Y ** 2) # z关于x,y的函数关系式,此处为锥面
# Z = X ** 2 + 2 * Y ** 2 - 2 * X * Y - 2 * Y
# Z = X ** 2 + Y ** 2
Z = (1 - X) ** 2 + 100 * (Y - X ** 2) ** 2
axes.plot_surface(X, Y, Z, cmap='rainbow')
axes.set_xlabel('X')
axes.set_ylabel('Y')
axes.set_zlabel('Z')
axes.set_title('Conjugate gradient method')
# 对x_1和x_2求导
# 一阶导数
grad_1 = diff(fun, x_1)
grad_2 = diff(fun, x_2)
# 二阶导数
grad_12 = diff(grad_1, x_2)
grad_11 = diff(fun, x_1, 2)
grad_22 = diff(fun, x_2, 2)
# 定义参数
MaxIter = 200
epsilon = 0.0001
# 定义迭代初始点
x_1_value = -2
x_2_value = 2
iter_cnt = 0
grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_12_value = (float)(grad_12.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_11_value = (float)(grad_11.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_22_value = (float)(grad_22.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()
current_obj_last = 1000
print(
'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f ' % (
iter_cnt, x_1_value, x_2_value, current_obj, grad_1_value, grad_2_value))
while (iter_cnt <= MaxIter and abs(current_obj_last - current_obj) >= epsilon):
# while (abs(grad_1_value) + abs(grad_2_value) >= epsilon):
current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()
# 迭代点变化
axes.scatter(x_1_value, x_2_value, current_obj, c='r', marker='*', linewidths=3) # 画出迭代点的变化
plt.pause(0.5)
iter_cnt += 1
# 构造梯度矩阵
G = np.vstack((grad_1_value, grad_2_value))
# 构造海森矩阵
H = np.vstack(([grad_11_value, grad_12_value],
[grad_12_value, grad_22_value]))
# 更新方向
if iter_cnt == 1:
d = np.vstack((grad_1_value, grad_2_value))
else:
d = -G + ((d.T @ H @ G) / (d.T @ H @ d)) * d
# 更新步长
lamda = -(d.T @ G) / (d.T @ H @ d)
# 牛顿迭代法更新迭代点
z_value = np.vstack((x_1_value, x_2_value)) + lamda * d
x_1_value = (float)(z_value[0])
x_2_value = (float)(z_value[1])
current_obj_last = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()
grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_12_value = (float)(grad_12.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_11_value = (float)(grad_11.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
grad_22_value = (float)(grad_22.subs({x_1: x_1_value, x_2: x_2_value}).evalf())
print(
'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f ' % (
iter_cnt, x_1_value, x_2_value, current_obj_last, grad_1_value, grad_2_value))
# 显示
plt.show()
4、拉格朗日乘数法:
from sympy import *
x1, x2, k = symbols('x1,x2,k')
f = 60 - 10 * x1 - 4 * x2 + (x1) ** 2 + (x2) ** 2 - x1 * x2
g = x1 + x2 - 8
# 构造拉格朗日等式
L = f - k * g
# 求导,构造KKT条件
dx1 = diff(L, x1) # 对x1求偏导
dx2 = diff(L, x2) # 对x2求偏导
dk = diff(L, k) # 对k求偏导
# 求出个变量解
m = solve([dx1, dx2, dk], [x1, x2, k])
print('函数求得最值时,变量值为:{}'.format(m))
# {x1: 5, x2: 3, k: -3}
# 给变量重新赋值
x1 = m[x1]
x2 = m[x2]
k = m[k]
# 计算方程的值
f = 60 - 10 * x1 - 4 * x2 + (x1) ** 2 + (x2) ** 2 - x1 * x2
print("方程的极小值为:", f)