Reputation: 51
I'm trying to build a deep Q learning agent that plays stick cart. Where through reinforcement learning, it learns to balance the stick by moving the cart.
My model works but I cannot figure out how to plot the training progress. I can't figure out how to plot the games and scores similar to this picture:
https://github.com/JulesVerny/PongReinforcementLearning/blob/master/ScoreGrowth.png
I've been playing around with matplotlib but can't seem to figure it out.
I've been able to get a plot to show up but it only displays as blank. Not too sure what to do now.
Here's my code:
import random
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from matplotlib import pyplot as plt
EPISODES = 10
class DQNAgent:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=2000)
self.gamma = 0.95 # discount rate
self.epsilon = 1.0 # exploration rate
self.epsilon_min = 0.01
self.epsilon_decay = 0.995
self.learning_rate = 0.001
self.model = self._build_model()
def _build_model(self):
# Neural Net for Deep-Q learning Model
model = Sequential()
model.add(Dense(24, input_dim=self.state_size, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(self.action_size, activation='linear'))
model.compile(loss='mse',
optimizer=Adam(lr=self.learning_rate))
return model
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
act_values = self.model.predict(state)
return np.argmax(act_values[0]) # returns action
def replay(self, batch_size):
minibatch = random.sample(self.memory, batch_size)
for state, action, reward, next_state, done in minibatch:
target = reward
if not done:
target = (reward + self.gamma *
np.amax(self.model.predict(next_state)[0]))
target_f = self.model.predict(state)
target_f[0][action] = target
self.model.fit(state, target_f, epochs=1, verbose=0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
def load(self, name):
self.model.load_weights(name)
def save(self, name):
self.model.save_weights(name)
if __name__ == "__main__":
env = gym.make('CartPole-v1')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size)
# agent.load("/home/jack/Desktop/cartpole-dqn.h5")
done = False
batch_size = 32
for e in range(EPISODES):
state = env.reset()
state = np.reshape(state, [1, state_size])
for time in range(500):
# env.render()
action = agent.act(state)
next_state, reward, done, _ = env.step(action)
reward = reward if not done else -10
next_state = np.reshape(next_state, [1, state_size])
agent.remember(state, action, reward, next_state, done)
state = next_state
if done:
print("episode: {}/{}, score: {}, e: {:.2}"
.format(e, EPISODES, time, agent.epsilon))
break
if len(agent.memory) > batch_size:
agent.replay(batch_size)
if e % 10 == 0:
agent.save("/home/jack/Desktop/cartpole-dqn.h5")
Any ideas?
Upvotes: 0
Views: 2868
Reputation: 2975
An easy way is to initialize a reward list after you define your batch size, such as:
rewardList = []
Then, initialize a reward accumulator for each episode. Define it after your environment reset:
accu_reward = 0
Then, at the last line of your time loop, put:
accu_reward += reward
if time == 499:
rewardList.append(accu_reward)
Then at the very bottom of your code:
plt.plot(rewardList)
plt.show()
That should give you your reward evolution across your training episodes.
Here, you append to your reward list after each episode. You can also append to the rewardList after each step, but it'll be a lot higher space complex and would contain more variance.
Upvotes: 1