Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
09044c5
updates
addudley Mar 26, 2018
978c1c8
Tweaked hyperparams for improved performance
addudley Mar 26, 2018
27b8f50
Uploading models
addudley Mar 26, 2018
655795c
Added models
addudley Mar 27, 2018
af0bcb3
Create kframes_CNN_basic.py
marirsg2 Apr 2, 2018
874307d
Merge pull request #1 from marirsg2/patch-1
marirsg2 Apr 2, 2018
4d04348
working kframes version.
marirsg2 Apr 4, 2018
d87f616
Added neural net models and re-organized code base
safreita1 Apr 5, 2018
a4df8ba
Added transfer learning and hyperparameter functionality
safreita1 Apr 10, 2018
ba02eb1
Added automatic data collection for hyperparameter testing
safreita1 Apr 14, 2018
051f813
Fixed bug in data collection and Model class
safreita1 Apr 14, 2018
5ac5fc9
added preliminary lstm code
marirsg2 Apr 15, 2018
6dfbcfd
working lstm
marirsg2 Apr 15, 2018
7cce4ee
Working set for all options with lstm code
marirsg2 Apr 15, 2018
37022e0
updated CNN model to match original model
marirsg2 Apr 16, 2018
b5a4481
changed hyper parameter sweep to sweep over kframes 1,2,3 and lstm
marirsg2 Apr 16, 2018
b0f06e3
added hyper parameter sweep over frame repeat
marirsg2 Apr 16, 2018
b8360fb
Fixed bug in hyperparameter optimization
safreita1 Apr 17, 2018
d544ac6
added support to save json data file for andy plot
marirsg2 Apr 24, 2018
7f3bbff
added Andy's plot file to use the json dump to plot
marirsg2 Apr 24, 2018
74b3617
fixed bug to get the data file saved every epoch
marirsg2 Apr 24, 2018
b7a8ba4
WORKING andy's plot with option 1 (train)
marirsg2 Apr 24, 2018
5d77a9b
added comments in all code to indicate what was written by us
marirsg2 Apr 24, 2018
e7a37e2
Added README and minor comment changes
safreita1 Apr 25, 2018
fc684e8
Changed initial parameter settings so DQN policy converges on basic env.
safreita1 Apr 25, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
__pycache__/
*.py[cod]
.mypy_cache/
tags
*.vim
72 changes: 72 additions & 0 deletions .idea/markdown-navigator.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

230 changes: 230 additions & 0 deletions DQN.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
from random import sample, randint, random
import random
import skimage.color, skimage.transform
import numpy as np

"""
NOTE : Large parts of this file contains code borrowed from
https://github.com/flyyufelix/VizDoom-Keras-RL/blob/master/drqn.py

The parts are clearly marked with comments
"""

class ReplayMemory:
"""
NOTE: The init function and the core replay memory code to init, add transition, and get sample
was started from the code found at the aforementioned github repository. We made a lot of modifications
to support sampling multiple frames, as well as reshaping, and adding a separate test memory on top of the
replay memory
"""
def __init__(self, capacity, kframes, resolution, test_memory_size):
self.resolution = resolution
state_shape = (capacity, resolution[0], resolution[1] )
test_state_shape = (test_memory_size, resolution[0], resolution[1])
self.s1 = np.zeros(state_shape, dtype=np.float32)
self.s2 = np.zeros(state_shape, dtype=np.float32)
self.a = np.zeros(capacity, dtype=np.int32)
self.r = np.zeros(capacity, dtype=np.float32)
self.isterminal = np.zeros(capacity, dtype=np.float32)

self.test_buffer = np.zeros(test_state_shape, dtype=np.float32)
self.test_buff_pos = 0
self.test_size = 0

self.capacity = capacity
self.size = 0
self.pos = 0

self.kframes = kframes

def add_transition(self, s1, action, s2, isterminal, reward):
self.s1[self.pos, :, :] = s1
self.a[self.pos] = action
if not isterminal:
self.s2[self.pos, :, :] = s2
self.isterminal[self.pos] = isterminal
self.r[self.pos] = reward

self.pos = (self.pos + 1) % self.capacity
self.size = min(self.size + 1, self.capacity)

def get_sample(self, sample_size):
samples_s1_container = []
samples_action_container = []
samples_s2_container = []
samples_isTerminal_container = []
samples_reward_container= []
for i in sample(range(0, self.size), sample_size):#sample size is not kframes, but could be 32 or 64 (batch size)
frame_indices = range(i-self.kframes+1, i+1)#+1 so that the last data point is considered too.
#this will wrap around with negative numbers, so -2, -1,0,1,2 :-)

#todo, this will be bad training. Need zero padding. mimic the test buffer

s1_data = self.s1[frame_indices]
action_data = self.a[i]
s2_data = self.s2[frame_indices]
isTerminal_data = self.isterminal[i]
reward_data = self.r[i]

#IF we have a terminal frame in the PRECEEDING k-frames then we DO NOT take those frames. Rather we
# #repeat the frame after it
terminal_offset = 0 #not possible case as you will see
if True in self.isterminal[range(i-self.kframes+1, i)]: #dont care if the last one is terminal
for k in range(1, self.kframes):#excludes the last index
if self.isterminal[i-k] : terminal_offset = k
#--end outer if
if terminal_offset != 0: #then we need to repeat some frames
#0:-offset = -offset repeated as many times
num_repeats = self.kframes - terminal_offset
tmp_reshaped = np.zeros([1] + list(s1_data[-terminal_offset].shape)) # add a leading dummy dimension
repeat_frames = np.tile(tmp_reshaped, [num_repeats] + [1] * len(self.resolution))
s1_data[0:-terminal_offset] = repeat_frames
s2_data[0:-terminal_offset] = repeat_frames
#however, the s2 preceeding frame would be s1, so that can be added
s2_data[-terminal_offset-1] = s1_data[-terminal_offset]

samples_s1_container.append(s1_data)
samples_action_container.append(action_data)
samples_s2_container.append(s2_data)
samples_isTerminal_container.append(isTerminal_data)
samples_reward_container.append(reward_data)

return np.array(samples_s1_container),np.array(samples_action_container), \
np.array(samples_s2_container),np.array(samples_isTerminal_container),np.array(samples_reward_container)

def add_to_test_buffer(self, curr_state):
self.test_buffer[self.test_buff_pos, :, :] = curr_state
self.test_buff_pos = (self.test_buff_pos + 1) % self.capacity
self.test_size = min(self.test_size + 1, self.capacity)

def reset_test_buffer(self):
self.test_buffer = np.zeros_like(self.test_buffer, dtype=np.float32)
self.test_buff_pos = 0
self.test_size = 0

def get_test_sample(self):
#get sample of size kframes, or prepend zeros.
return_state_shape = (self.kframes, self.resolution[0], self.resolution[1])
ret_buffer = None
if self.test_size >= self.kframes:
ret_buffer = self.test_buffer[self.test_size-self.kframes:self.test_size,:,:]
else:#only fill what we have, and have preceeding zero frames (which was already done)
ret_buffer = np.zeros(return_state_shape, dtype=np.float32)
ret_buffer[self.kframes-self.test_size:,:,:] = self.test_buffer[:self.test_size, :, :]
# num_repeats = kframes-self.test_size
# tmp_reshaped = self.test_buffer[0].reshape([1]+list(self.test_buffer[0].shape))#add a leading dummy dimension
# repeat_frames = np.tile(tmp_reshaped,[num_repeats]+[1]*len(resolution) )
# ret_buffer[:kframes-self.test_size,:,:] = repeat_frames
return ret_buffer


class DQN:
def __init__(self, memory, model, game, actions, resolution, frame_repeat, batch_size, kframes, epochs, discount_factor,
model_type):
self.memory = memory
self.model = model
self.model_type = model_type
self.game = game
self.actions = actions
self.resolution = resolution
self.frame_repeat = frame_repeat
self.batch_size = batch_size
self.kframes = kframes
self.epochs = epochs
self.discount_factor = discount_factor

def preprocess(self, img):
img = skimage.transform.resize(img, [self.resolution[0], self.resolution[1]])
img = img.astype(np.float32)
return img

def learn_from_memory(self, model):
""" Use replay memory to learn. Ignore s2 if s1 is terminal """
"""
NOTE: This code is largely unchanged from the github repository code and is very standard for Deep RL.
only the rehsaping code is ours in this method.
"""
if self.memory.size > self.batch_size:
s1, a, s2, isterminal, r = self.memory.get_sample(self.batch_size)

if self.model_type == 4:
s1 = s1.reshape(
list(s1.shape[0:2]) + [1] + list(self.resolution)) # converting to [ batch*kframes*1channel*width*height ]
s2 = s2.reshape(
list(s2.shape[0:2]) + [1] + list(self.resolution)) # converting to [ batch*kframes*1channel*width*height ]


q = model.predict(s2, batch_size=self.batch_size)
q2 = np.max(q, axis=1)
target_q = model.predict(s1, batch_size=self.batch_size)
target_q[np.arange(target_q.shape[0]), a] = r + self.discount_factor * (1 - isterminal) * q2
model.fit(s1, target_q, verbose=0)

def get_best_action(self, state):
"""
NOTE: This code is largely unchanged from the github repository code and is very standard for Deep RL.
only the rehsaping code is ours in this method.
"""
if self.model_type == 4:
state = state.reshape(
list(state.shape[0:2]) + [1] + list(self.resolution)) # converting to [ batch*kframes*1channel*width*height ]

q = self.model.predict(state, batch_size=1)
m = np.argmax(q, axis=1)[0]
action = m # wrong
return action


def perform_learning_step(self, epoch):
""" Makes an action according to eps-greedy policy, observes the result
(next state, reward) and learns from the transition"""
"""
NOTE: This code is largely unchanged from the github repository code and contains very standard steps for Deep RL.
We do NOT take any credit for this code, and was not modified or analyzed for our research. We treat this like a library
function call
"""
def exploration_rate(epoch):
"""# Define exploration rate change over time"""

# return 0.1

start_eps = 1.0
end_eps = 0.1
const_eps_epochs = 0.1 * self.epochs # 10% of learning time
eps_decay_epochs = 0.6 * self.epochs # 60% of learning time

if epoch < const_eps_epochs:
return start_eps
elif epoch < eps_decay_epochs:
# Linear decay
return start_eps - (epoch - const_eps_epochs) / \
(eps_decay_epochs - const_eps_epochs) * (start_eps - end_eps)
else:
return end_eps

s1 = self.preprocess(self.game.get_state().screen_buffer)

# With probability eps make a random action.
eps = exploration_rate(epoch)
if random.random() <= eps:
a = randint(0, len(self.actions) - 1)
else:
# Choose the best action according to the network.
self.memory.add_to_test_buffer(s1)
state_kframes = self.memory.get_test_sample()
state_kframes = state_kframes.reshape([1, self.kframes, self.resolution[0], self.resolution[1]])
a = self.get_best_action(state_kframes)
reward = self.game.make_action(self.actions[a], self.frame_repeat)

isterminal = self.game.is_episode_finished()

s2 = self.preprocess(self.game.get_state().screen_buffer) if not isterminal else None

# Remember the transition that was just experienced.
self.memory.add_transition(s1, a, s2, isterminal, reward)

self.learn_from_memory(self.model)




Loading