|
import os |
|
import argparse |
|
import pickle |
|
import keras |
|
import numpy as np |
|
|
|
from keras.layers import Dense, Multiply, Input, Conv2D, Flatten |
|
from keras.models import Sequential, Model |
|
from keras.optimizers import Adam, RMSprop, SGD |
|
|
|
from skimage.transform import resize |
|
from skimage.color import rgb2gray |
|
|
|
STORING_PATH = './results/' |
|
MODELS_PATH = './trained_models/' |
|
|
|
def save_results(environment, approximator, seed, rewards): |
|
storing_path = os.path.join(STORING_PATH, environment, approximator, str(seed)) |
|
if not os.path.exists(storing_path): |
|
os.makedirs(storing_path) |
|
|
|
np.save(storing_path + '/' + 'upside_down_rewards.npy', rewards) |
|
|
|
def get_functional_behaviour_function(state_size, command_size, action_size): |
|
observation_input = keras.Input(shape=(state_size,)) |
|
linear_layer = Dense(64, activation='sigmoid')(observation_input) |
|
|
|
command_input = keras.Input(shape=(command_size,)) |
|
sigmoidal_layer = Dense(64, activation='sigmoid')(command_input) |
|
|
|
multiplied_layer = Multiply()([linear_layer, sigmoidal_layer]) |
|
|
|
layer_1 = Dense(64, activation='relu')(multiplied_layer) |
|
layer_2 = Dense(64, activation='relu')(layer_1) |
|
layer_3 = Dense(64, activation='relu')(layer_2) |
|
layer_4 = Dense(64, activation='relu')(layer_3) |
|
final_layer = Dense(action_size, activation='softmax')(layer_4) |
|
|
|
model = Model(inputs=[observation_input, command_input], outputs=final_layer) |
|
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001)) |
|
|
|
return model |
|
|
|
def get_atari_behaviour_function(action_size): |
|
|
|
print('Getting the model') |
|
|
|
input_state = Input(shape=(84,84,4)) |
|
|
|
first_conv = Conv2D( |
|
32, (8, 8), strides=(4,4), activation='relu')(input_state) |
|
second_conv = Conv2D( |
|
64, (4, 4), strides=(2,2), activation='relu')(first_conv) |
|
third_conv = Conv2D( |
|
64, (3, 3), strides=(1,1), activation='relu')(second_conv) |
|
|
|
flattened = Flatten()(third_conv) |
|
dense_layer = Dense(512, activation='relu')(flattened) |
|
|
|
command_input = keras.Input(shape=(2,)) |
|
sigmoidal_layer = Dense(512, activation='sigmoid')(command_input) |
|
|
|
multiplied_layer = Multiply()([dense_layer, sigmoidal_layer]) |
|
final_layer = Dense(256, activation='relu')(multiplied_layer) |
|
|
|
action_layer = Dense(action_size, activation='softmax')(final_layer) |
|
|
|
model = Model(inputs=[input_state, command_input], outputs=action_layer) |
|
model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=0.001, rho=0.95, epsilon=0.01)) |
|
|
|
|
|
print(model.summary()) |
|
|
|
return model |
|
|
|
def get_catch_behaviour_function(action_size): |
|
|
|
print('Getting the Catch-model') |
|
|
|
input_state = Input(shape=(84,84,4)) |
|
|
|
first_conv = Conv2D( |
|
32, (8, 8), strides=(4,4), activation='relu')(input_state) |
|
second_conv = Conv2D( |
|
64, (4, 4), strides=(2,2), activation='relu')(first_conv) |
|
third_conv = Conv2D( |
|
64, (3, 3), strides=(1,1), activation='relu')(second_conv) |
|
|
|
flattened = Flatten()(third_conv) |
|
dense_layer = Dense(512, activation='relu')(flattened) |
|
|
|
command_input = keras.Input(shape=(2,)) |
|
sigmoidal_layer = Dense(512, activation='sigmoid')(command_input) |
|
|
|
multiplied_layer = Multiply()([dense_layer, sigmoidal_layer]) |
|
final_layer = Dense(256, activation='relu')(multiplied_layer) |
|
|
|
action_layer = Dense(action_size, activation='softmax')(final_layer) |
|
|
|
model = Model(inputs=[input_state, command_input], outputs=action_layer) |
|
model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=0.001, rho=0.95, epsilon=0.01)) |
|
|
|
|
|
print(model.summary()) |
|
|
|
return model |
|
|
|
|
|
def pre_processing(state): |
|
processed_state = np.uint8( |
|
resize(rgb2gray(state), (84, 84), mode='constant')*255) |
|
|
|
return processed_state |
|
|
|
def save_trained_model(environment, seed, model): |
|
storing_path = os.path.join(MODELS_PATH, environment, str(seed)) |
|
if not os.path.exists(storing_path): |
|
os.makedirs(storing_path) |
|
|
|
model.save_weights(storing_path + '/' + 'trained_model.h5') |
|
|
|
|