python gym使用 python的gym

转载

技术领航博主 2023-08-08 19:57:01

文章标签 python gym使用机器学习人工智能 python 图形化 文章分类 Python 后端开发

Gym基本使用方法

python扩展库Gym是OpenAI推出的免费强化学习实验环境。Gym库的使用方法是：
1、使用env = gym.make(环境名)取出环境
2、使用env.reset()初始化环境
3、使用env.step(动作)执行一步环境
4、使用env.render()显示环境
5、使用env.close()关闭环境

源代码

下面将以小车上山为例，说明Gym的基本使用方法。

import gym #导入gym库
import numpy as np #numpy是一个由多维数组对象和用于处理数组的例程集合组成的库

env = gym.make('MountainCar-v0') #通过make（）函数取出环境‘CartPole-v0’
#查看观测空间和动作空间
print('观测空间 = {}'.format(env.observation_space)) #Box([-1.2  -0.07], [0.6  0.07], (2,), float32)
print('动作空间 = {}'.format(env.action_space)) #Discrete(3)
print('观测范围 = {} ~ {}'.format(env.observation_space.low,env.observation_space.high)) #[-1.2  -0.07] ~ [0.6  0.07]
print('动作数 = {}'.format(env.action_space.n)) #3

#根据指定确定性策略决定动作的智能体
#给出BespokeAgent类，decide()方法实现了决策功能，learn()实现了学习功能
class BespokeAgent:
    def __init__(self,env):
        pass

    def decide(self,observation): #决策
        position, velocity = observation
        lb = min(-0.09 * (position + 0.25)**2 +0.03,0.3 * (position +0.9) ** 4 - 0.008)
        ub = -0.07 * (position + 0.38) ** 3 + 0.06
        if lb < velocity <ub:
            action = 2
        else:
            action = 0
        return action #返回动作

    def learn(self, *args): #学习
        pass

agent = BespokeAgent(env)

#play_montecarlo()函数实现的功能：智能体和环境交互一个回合，共有4个参数
#参数env是环境类
#参数agent是智能体类
#参数render(bool型)指示在运行过程中是否要图形化显示，若render=True，泽图形化显示；若要关闭，可使用env.close()函数
#参数train（bool型）指示在运行过程中是否训练智能体，若train=True，泽调用agent.learn()函数；在测试过程中应当设置为False，使得智能体不变
def play_montecarlo(env, agent, render=False, train=False):
    episode_reward = 0. #记录回合总奖励，初始化为0
    observation = env.reset() #重置游戏环境，开始新回合
    while True: #不断循环，直到回合结束
        if render: #判断是否显示
            env.render() #显示图形界面，图形界面可以用env.close()语句关闭
        action = agent.decide(observation)
        next_observation, reward, done, _ = env.step(action) #执行动作
        episode_reward += reward #收集回合奖励
        if train: #判断是否训练智能体
            agent.learn(observation, action, reward, done)
        if done: #回合结束，跳出循环
            break
        observation = next_observation
    return episode_reward #返回回合总奖励

#上面已经设置了环境和智能体，接下来让智能体与环境进行一个交互，并图形化显示，显示完毕后，使用env.close()关闭图形化界面。
env.seed(0)
episode_reward = play_montecarlo(env, agent, render=True)
print('回合奖励 = {}'.format(episode_reward))
env.close()

#为了评估智能体性能，通常求出连续交互100回合的平均回合奖励（100回合完全是习惯使然，没有什么特别愿因）
episode_reward = [play_montecarlo(env, agent) for _ in range(100)]
print('平均回合奖励 = {}'.format(np.mean(episode_reward)))