In this section, we will put together the whole code into a single Python script to initialize the environment, launch the agent's training process, get the trained policy, test the performance of the agent, and also record how it acts in the environment!
#!/usr/bin/env/ python
import gym
import numpy as np
MAX_NUM_EPISODES = 50000
STEPS_PER_EPISODE = 200 # This is specific to MountainCar. May change with env
EPSILON_MIN = 0.005
max_num_steps = MAX_NUM_EPISODES * STEPS_PER_EPISODE
EPSILON_DECAY = 500 * EPSILON_MIN / max_num_steps
ALPHA = 0.05 # Learning rate
GAMMA = 0.98 # Discount factor
NUM_DISCRETE_BINS = 30 # Number of bins to Discretize each observation dim
class Q_Learner(object):
def __init__(self, env):
self.obs_shape = env.observation_space.shape
self.obs_high = env.observation_space...