from stable_baselines3 import DQN from stable_baselines3.common.vec_env.dummy_vec_env import DummyVecEnv from stable_baselines3.common.evaluation import evaluate_policy import gym
def__init__(self, arg1, arg2, ...): super(CustomEnv, self).__init__() # Define action and observation space # They must be gym.spaces objects # Example when using discrete actions: self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) # Example for using image as input (channel-first; channel-last also works): self.observation_space = spaces.Box(low=0, high=255, shape=(N_CHANNELS, HEIGHT, WIDTH), dtype=np.uint8)
defstep(self, action): ... return observation, reward, done, info defreset(self): ... return observation # reward, done, info can't be included defrender(self, mode='human'): ... defclose (self): pass
defreset(self): """ Important: 观测必须是一个 np.array :return: (np.array) """ # Initialize the agent at the right of the grid self.agent_pos = self.grid_size - 1 # here we convert to float32 to make it more general (in case we want to use continuous actions) return np.array([self.agent_pos]).astype(np.float32)
defstep(self, action): if action == self.LEFT: self.agent_pos -= 1 elif action == self.RIGHT: self.agent_pos += 1 else: raise ValueError("Received invalid action={} which is not part of the action space".format(action)) # 如果走到边缘就不能继续走了 self.agent_pos = np.clip(self.agent_pos, 0, self.grid_size) # 如果走到最左边代表结束了 done = bool(self.agent_pos == 0) # 走到最左边就给一个正的 reward reward = 1ifself.agent_pos == 0else0 # 目前没有需要额外输出的信息 info = {} return np.array([self.agent_pos]).astype(np.float32), reward, done, info
defrender(self, mode='console'): # 在命令行中渲染 if mode != 'console': raise NotImplementedError() # agent is represented as a cross, rest as a dot print("." * self.agent_pos, end="") print("x", end="") print("." * (self.grid_size - self.agent_pos))