import os import argparse import pickle import keras import numpy as np from keras.layers import Dense, Multiply, Input, Conv2D, Flatten from keras.models import Sequential, Model from keras.optimizers import Adam, RMSprop, SGD from skimage.transform import resize from skimage.color import rgb2gray STORING_PATH = './results/' MODELS_PATH = './trained_models/' def save_results(environment, approximator, seed, rewards): storing_path = os.path.join(STORING_PATH, environment, approximator, str(seed)) if not os.path.exists(storing_path): os.makedirs(storing_path) np.save(storing_path + '/' + 'upside_down_rewards.npy', rewards) def get_functional_behaviour_function(state_size, command_size, action_size): observation_input = keras.Input(shape=(state_size,)) linear_layer = Dense(64, activation='sigmoid')(observation_input) command_input = keras.Input(shape=(command_size,)) sigmoidal_layer = Dense(64, activation='sigmoid')(command_input) multiplied_layer = Multiply()([linear_layer, sigmoidal_layer]) layer_1 = Dense(64, activation='relu')(multiplied_layer) layer_2 = Dense(64, activation='relu')(layer_1) layer_3 = Dense(64, activation='relu')(layer_2) layer_4 = Dense(64, activation='relu')(layer_3) final_layer = Dense(action_size, activation='softmax')(layer_4) model = Model(inputs=[observation_input, command_input], outputs=final_layer) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001)) return model def get_atari_behaviour_function(action_size): print('Getting the model') input_state = Input(shape=(84,84,4)) first_conv = Conv2D( 32, (8, 8), strides=(4,4), activation='relu')(input_state) second_conv = Conv2D( 64, (4, 4), strides=(2,2), activation='relu')(first_conv) third_conv = Conv2D( 64, (3, 3), strides=(1,1), activation='relu')(second_conv) flattened = Flatten()(third_conv) dense_layer = Dense(512, activation='relu')(flattened) command_input = keras.Input(shape=(2,)) sigmoidal_layer = Dense(512, activation='sigmoid')(command_input) multiplied_layer = Multiply()([dense_layer, sigmoidal_layer]) final_layer = Dense(256, activation='relu')(multiplied_layer) action_layer = Dense(action_size, activation='softmax')(final_layer) model = Model(inputs=[input_state, command_input], outputs=action_layer) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=0.001, rho=0.95, epsilon=0.01)) print(model.summary()) return model def get_catch_behaviour_function(action_size): print('Getting the Catch-model') input_state = Input(shape=(84,84,4)) first_conv = Conv2D( 32, (8, 8), strides=(4,4), activation='relu')(input_state) second_conv = Conv2D( 64, (4, 4), strides=(2,2), activation='relu')(first_conv) third_conv = Conv2D( 64, (3, 3), strides=(1,1), activation='relu')(second_conv) flattened = Flatten()(third_conv) dense_layer = Dense(512, activation='relu')(flattened) command_input = keras.Input(shape=(2,)) sigmoidal_layer = Dense(512, activation='sigmoid')(command_input) multiplied_layer = Multiply()([dense_layer, sigmoidal_layer]) final_layer = Dense(256, activation='relu')(multiplied_layer) action_layer = Dense(action_size, activation='softmax')(final_layer) model = Model(inputs=[input_state, command_input], outputs=action_layer) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=0.001, rho=0.95, epsilon=0.01)) print(model.summary()) return model def pre_processing(state): processed_state = np.uint8( resize(rgb2gray(state), (84, 84), mode='constant')*255) return processed_state def save_trained_model(environment, seed, model): storing_path = os.path.join(MODELS_PATH, environment, str(seed)) if not os.path.exists(storing_path): os.makedirs(storing_path) model.save_weights(storing_path + '/' + 'trained_model.h5')