Advanced Deep Learning with Keras

Chapter 9

# discount factor

self.gamma = 0.9

# initially 90% exploration, 10% exploitation

self.epsilon = 0.9

# iteratively applying decay til 10% exploration/90%


self.epsilon_decay = decay

self.epsilon_min = 0.1

# learning rate of Q-Learning

self.learning_rate = 0.1

# file where Q Table is saved on/restored fr

if slippery:

self.filename = 'q-frozenlake-slippery.npy'


self.filename = 'q-frozenlake.npy'

# demo or train mode

self.demo = demo

# if demo mode, no exploration

if demo:

self.epsilon = 0

# determine the next action

# if random, choose from random action space

# else use the Q Table

def act(self, state, is_explore=False):

# 0 - left, 1 - Down, 2 - Right, 3 - Up

if is_explore or np.random.rand() < self.epsilon:

# explore - do random action

return self.action_space.sample()

# exploit - choose action with max Q-value

return np.argmax(self.q_table[state])

# TD(0) learning (generalized Q-Learning) with learning rate

def update_q_table(self, state, action, reward, next_state):

# Q(s, a) += alpha * (reward + gamma * max_a' Q(s', a') - Q

(s, a))

q_value = self.gamma * np.amax(self.q_table[next_state])

q_value += reward

q_value -= self.q_table[state, action]

