The following is the full code for the example shown previously:
import gym
import numpy as np
import random
import tensorflow as tf
import matplotlib.pyplot as plt
#Define the FrozenLake enviroment
env = gym.make('FrozenLake-v0')
#Setup the TensorFlow placeholders and variabiles
tf.reset_default_graph()
inputs1 = tf.placeholder(shape=[1,16],dtype=tf.float32)
W = tf.Variable(tf.random_uniform([16,4],0,0.01))
Qout = tf.matmul(inputs1,W)
predict = tf.argmax(Qout,1)
nextQ = tf.placeholder(shape=[1,4],dtype=tf.float32)
#define the loss and optimization functions
loss = tf.reduce_sum(tf.square(nextQ - Qout))
trainer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
updateModel = trainer.minimize(loss)
#initilize the vabiables
init = tf.global_variables_initializer()
#prepare the q-learning parameters
gamma = .99
e = 0.1
num_episodes = 6000
jList = []
rList...