| import gymnasium as gym | |
| import numpy as np | |
| import imageio | |
| NUMBER_OF_EPISODES = 30000 | |
| LEARNING_RATE = 0.2 | |
| DISCOUNT_FACTOR = 0.97 | |
| EPSILON = 0.2 | |
| def initialize_environment(): | |
| env = gym.make('CliffWalking-v0') | |
| state_size = env.observation_space.n | |
| action_size = env.action_space.n | |
| print(f"State size: {state_size}, Action size: {action_size}") | |
| return env, state_size, action_size | |
| def initialize_q_table(state_size, action_size): | |
| return np.zeros((state_size, action_size)) | |
| def epsilon_greedy_action_selection(state, qtable, env, epsilon): | |
| if np.random.uniform(0, 1) < epsilon: | |
| return env.action_space.sample() | |
| else: | |
| return np.argmax(qtable[state, :]) | |
| def update_q_value(current_state, action, reward, next_state, qtable, learning_rate, discount_factor): | |
| future_q_value = np.max(qtable[next_state, :]) | |
| current_q_value = qtable[current_state, action] | |
| new_q_value = current_q_value + learning_rate * (reward + discount_factor * future_q_value - current_q_value) | |
| qtable[current_state, action] = new_q_value | |
| def train_agent(env, qtable, num_episodes, learning_rate, discount_factor, epsilon): | |
| for episode_nr in range(num_episodes): | |
| current_state, _ = env.reset() | |
| done = False | |
| while not done: | |
| action = epsilon_greedy_action_selection(current_state, qtable, env, epsilon) | |
| next_state, reward, done, _, _ = env.step(action) | |
| update_q_value(current_state, action, reward, next_state, qtable, learning_rate, discount_factor) | |
| current_state = next_state | |
| if episode_nr % 10000 == 0: | |
| print(f"\nQ-table after episode {episode_nr + 1}:") | |
| np.set_printoptions(precision=2, suppress=True) | |
| print(qtable) | |
| return qtable | |
| def save_qtable(filename, qtable): | |
| np.save(filename, qtable) | |
| print(f"Q-table saved as {filename}") | |
| def create_replay_video(env, qtable, filename="replay.mp4"): | |
| frames = [] | |
| current_state, _ = env.reset() | |
| done = False | |
| while not done: | |
| frames.append(env.render()) | |
| action = np.argmax(qtable[current_state, :]) | |
| next_state, _, done, _, _ = env.step(action) | |
| current_state = next_state | |
| env.close() | |
| with imageio.get_writer(filename, fps=10) as video: | |
| for frame in frames: | |
| video.append_data(frame) | |
| print(f"Video saved as {filename}") | |
| def main(): | |
| env, state_size, action_size = initialize_environment() | |
| qtable = initialize_q_table(state_size, action_size) | |
| qtable = train_agent(env, qtable, NUMBER_OF_EPISODES, LEARNING_RATE, DISCOUNT_FACTOR, EPSILON) | |
| save_qtable("cliffWalking_qtable.npy", qtable) | |
| env = gym.make('CliffWalking-v0', render_mode="rgb_array") | |
| create_replay_video(env, qtable) | |
| if __name__ == "__main__": | |
| main() | |