pkalkman commited on
Commit
0f110ff
·
1 Parent(s): 0035719

first version

Browse files
Files changed (4) hide show
  1. README.md +43 -0
  2. cliffWalking_qtable.npy +3 -0
  3. replay.mp4 +0 -0
  4. train.py +93 -0
README.md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - reinforcement-learning
4
+ - q-learning
5
+ - gymnasium
6
+ - cliffwalking
7
+ library_name: gymnasium
8
+ license: apache-2.0
9
+ ---
10
+
11
+ # CliffWalking Q-Learning Agent
12
+
13
+ This repository contains a Q-learning agent trained on the **CliffWalking-v0** environment from **Gymnasium**. The agent learns to navigate the cliff, avoiding falling into the cliff zone while reaching the goal with minimal penalties. The Q-learning algorithm is implemented with epsilon-greedy exploration and updates the Q-table based on state-action-reward transitions.
14
+
15
+ ## Files:
16
+ - `train.py`: The main script that trains the Q-learning agent.
17
+ - `cliffWalking_qtable.npy`: The saved Q-table after training.
18
+ - `replay.mp4`: A video of the agent's performance after training.
19
+
20
+ ## Training Details:
21
+ - **Environment**: `CliffWalking-v0` (Gymnasium)
22
+ - **Episodes**: 30,000
23
+ - **Learning Rate (α)**: 0.2
24
+ - **Discount Factor (γ)**: 0.97
25
+ - **Epsilon (ε)**: 0.2 (exploration vs exploitation trade-off)
26
+
27
+ The agent starts by exploring the environment randomly and gradually learns the optimal path to avoid falling off the cliff while reaching the goal.
28
+
29
+ ## How to Run:
30
+
31
+ ### 1. Install Dependencies:
32
+
33
+ Make sure you have the required packages installed:
34
+ ```bash
35
+ pip install gymnasium numpy imageio[ffmpeg]
36
+ ```
37
+
38
+ ### 2. Training the Agent:
39
+ To train the agent, run the script train.py:
40
+
41
+ ```bash
42
+ python train.py
43
+ ```
cliffWalking_qtable.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e5660a72690ef4ad817dc09eca6744adcf58d4b3f51efcdc85bbdb0d94af9b3
3
+ size 1664
replay.mp4 ADDED
Binary file (47.5 kB). View file
 
train.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gymnasium as gym
2
+ import numpy as np
3
+ import imageio
4
+
5
+ NUMBER_OF_EPISODES = 30000
6
+ LEARNING_RATE = 0.2
7
+ DISCOUNT_FACTOR = 0.97
8
+ EPSILON = 0.2
9
+
10
+
11
+ def initialize_environment():
12
+ env = gym.make('CliffWalking-v0')
13
+ state_size = env.observation_space.n
14
+ action_size = env.action_space.n
15
+ print(f"State size: {state_size}, Action size: {action_size}")
16
+ return env, state_size, action_size
17
+
18
+
19
+ def initialize_q_table(state_size, action_size):
20
+ return np.zeros((state_size, action_size))
21
+
22
+
23
+ def epsilon_greedy_action_selection(state, qtable, env, epsilon):
24
+ if np.random.uniform(0, 1) < epsilon:
25
+ return env.action_space.sample()
26
+ else:
27
+ return np.argmax(qtable[state, :])
28
+
29
+
30
+ def update_q_value(current_state, action, reward, next_state, qtable, learning_rate, discount_factor):
31
+ future_q_value = np.max(qtable[next_state, :])
32
+ current_q_value = qtable[current_state, action]
33
+ new_q_value = current_q_value + learning_rate * (reward + discount_factor * future_q_value - current_q_value)
34
+ qtable[current_state, action] = new_q_value
35
+
36
+
37
+ def train_agent(env, qtable, num_episodes, learning_rate, discount_factor, epsilon):
38
+ for episode_nr in range(num_episodes):
39
+ current_state, _ = env.reset()
40
+ done = False
41
+
42
+ while not done:
43
+ action = epsilon_greedy_action_selection(current_state, qtable, env, epsilon)
44
+ next_state, reward, done, _, _ = env.step(action)
45
+ update_q_value(current_state, action, reward, next_state, qtable, learning_rate, discount_factor)
46
+ current_state = next_state
47
+
48
+ if episode_nr % 10000 == 0:
49
+ print(f"\nQ-table after episode {episode_nr + 1}:")
50
+ np.set_printoptions(precision=2, suppress=True)
51
+ print(qtable)
52
+
53
+ return qtable
54
+
55
+
56
+ def save_qtable(filename, qtable):
57
+ np.save(filename, qtable)
58
+ print(f"Q-table saved as {filename}")
59
+
60
+
61
+ def create_replay_video(env, qtable, filename="replay.mp4"):
62
+ frames = []
63
+ current_state, _ = env.reset()
64
+ done = False
65
+
66
+ while not done:
67
+ frames.append(env.render())
68
+ action = np.argmax(qtable[current_state, :])
69
+ next_state, _, done, _, _ = env.step(action)
70
+ current_state = next_state
71
+
72
+ env.close()
73
+
74
+ with imageio.get_writer(filename, fps=10) as video:
75
+ for frame in frames:
76
+ video.append_data(frame)
77
+
78
+ print(f"Video saved as {filename}")
79
+
80
+
81
+ def main():
82
+ env, state_size, action_size = initialize_environment()
83
+ qtable = initialize_q_table(state_size, action_size)
84
+
85
+ qtable = train_agent(env, qtable, NUMBER_OF_EPISODES, LEARNING_RATE, DISCOUNT_FACTOR, EPSILON)
86
+ save_qtable("cliffWalking_qtable.npy", qtable)
87
+
88
+ env = gym.make('CliffWalking-v0', render_mode="rgb_array")
89
+ create_replay_video(env, qtable)
90
+
91
+
92
+ if __name__ == "__main__":
93
+ main()