Muqeeth commited on
Commit
219bf79
·
verified ·
1 Parent(s): a531b17

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. seed_1000/agent_trainer/critic_optimizer_state.pt +3 -0
  2. seed_1000/iteration_010/agent:Alice_rewards.csv +64 -0
  3. seed_1000/iteration_010/agent:Bob_rewards.csv +64 -0
  4. seed_1000/iteration_010/agent_trainer_log/basic_training_metrics_2025-08-20___08-22-53.json +44 -0
  5. seed_1000/iteration_010/mgid:11254192_rollout_tree.json +0 -0
  6. seed_1000/iteration_010/mgid:11380392_rollout_tree.json +0 -0
  7. seed_1000/iteration_010/mgid:11404926_rollout_tree.json +0 -0
  8. seed_1000/iteration_010/mgid:12095407_rollout_tree.json +0 -0
  9. seed_1000/iteration_010/mgid:12810834_rollout_tree.json +0 -0
  10. seed_1000/iteration_010/mgid:13401802_rollout_tree.json +0 -0
  11. seed_1000/iteration_010/mgid:14231226_rollout_tree.json +0 -0
  12. seed_1000/iteration_010/mgid:14531618_rollout_tree.json +0 -0
  13. seed_1000/iteration_010/mgid:14845387_rollout_tree.json +0 -0
  14. seed_1000/iteration_010/mgid:18564316_rollout_tree.json +0 -0
  15. seed_1000/iteration_010/mgid:18788754_rollout_tree.json +0 -0
  16. seed_1000/iteration_010/mgid:18940688_rollout_tree.json +0 -0
  17. seed_1000/iteration_010/mgid:19148172_rollout_tree.json +0 -0
  18. seed_1000/iteration_010/mgid:20100269_rollout_tree.json +0 -0
  19. seed_1000/iteration_010/mgid:20135678_rollout_tree.json +0 -0
  20. seed_1000/iteration_010/mgid:20448792_rollout_tree.json +0 -0
  21. seed_1000/iteration_010/mgid:20726379_rollout_tree.json +0 -0
  22. seed_1000/iteration_010/mgid:21722326_rollout_tree.json +0 -0
  23. seed_1000/iteration_010/mgid:23064020_rollout_tree.json +0 -0
  24. seed_1000/iteration_010/mgid:23644929_rollout_tree.json +0 -0
  25. seed_1000/iteration_010/mgid:24470558_rollout_tree.json +0 -0
  26. seed_1000/iteration_010/mgid:24640213_rollout_tree.json +0 -0
  27. seed_1000/iteration_010/mgid:24794623_rollout_tree.json +0 -0
  28. seed_1000/iteration_010/mgid:24849914_rollout_tree.json +0 -0
  29. seed_1000/iteration_010/mgid:25615508_rollout_tree.json +0 -0
  30. seed_1000/iteration_010/mgid:26346888_rollout_tree.json +0 -0
  31. seed_1000/iteration_010/mgid:27097149_rollout_tree.json +0 -0
  32. seed_1000/iteration_010/mgid:27368155_rollout_tree.json +0 -0
  33. seed_1000/iteration_010/mgid:27388971_rollout_tree.json +0 -0
  34. seed_1000/iteration_010/mgid:27840658_rollout_tree.json +0 -0
  35. seed_1000/iteration_010/mgid:28087463_rollout_tree.json +0 -0
  36. seed_1000/iteration_010/mgid:29046134_rollout_tree.json +0 -0
  37. seed_1000/iteration_010/mgid:30728573_rollout_tree.json +0 -0
  38. seed_1000/iteration_010/mgid:31601106_rollout_tree.json +0 -0
  39. seed_1000/iteration_010/mgid:31818626_rollout_tree.json +0 -0
  40. seed_1000/iteration_010/mgid:32197308_rollout_tree.json +0 -0
  41. seed_1000/iteration_010/mgid:32846198_rollout_tree.json +0 -0
  42. seed_1000/iteration_010/mgid:33898430_rollout_tree.json +0 -0
  43. seed_1000/iteration_010/mgid:42647546_rollout_tree.json +0 -0
  44. seed_1000/iteration_010/mgid:50824074_rollout_tree.json +0 -0
  45. seed_1000/iteration_010/mgid:57340378_rollout_tree.json +0 -0
  46. seed_1000/iteration_010/mgid:72506343_rollout_tree.json +0 -0
  47. seed_1000/iteration_010/mgid:99461611_rollout_tree.json +0 -0
  48. seed_1000/iteration_011/agent:Alice_rewards.csv +64 -0
  49. seed_1000/iteration_011/agent:Bob_rewards.csv +64 -0
  50. seed_1000/iteration_011/agent_trainer_log/basic_training_metrics_2025-08-20___08-23-48.json +44 -0
seed_1000/agent_trainer/critic_optimizer_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9366f20093589fdd3acde3012a0e69238debe898e1315bf9ed1e32c81a69eea8
3
+ size 2631
seed_1000/iteration_010/agent:Alice_rewards.csv ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1,3,1,3,1,3,1,3,1,3
2
+ 1,5,1,5,1,5,1,5,1,5
3
+ 1,3,1,3,1,3,1,3,1,3
4
+ 1,3,1,3,1,3,1,3,1,3
5
+ 1,0,1,0,1,0,1,0,1,0
6
+ 1,3,1,3,1,3,1,3,1,3
7
+ 1,3,1,3,1,3,1,3,1,3
8
+ 1,3,1,3,1,3,1,3,1,3
9
+ 1,3,1,3,1,3,1,3,1,3
10
+ 1,5,1,5,1,5,1,5,1,5
11
+ 1,5,0,1,5,0,1,5,0,1
12
+ 1,0,1,0,1,0,1,0,1,0
13
+ 1,3,1,3,1,3,1,3,1,3
14
+ 1,0,5,1,0,5,1,0,5,1
15
+ 1,3,1,3,1,3,1,3,1,3
16
+ 1,3,1,3,1,3,1,3,1,3
17
+ 1,0,5,1,0,5,1,0,5,1
18
+ 1,0,1,1,1,1,1,1,1,1
19
+ 1,3,1,3,1,3,1,3,1,3
20
+ 1,1,1,1,1,1,1,1,1,1
21
+ 1,3,1,3,1,3,1,3,1,3
22
+ 1,3,1,3,1,3,1,3,1,3
23
+ 1,0,1,1,1,1,1,1,1,1
24
+ 1,0,5,1,0,5,1,0,5,1
25
+ 1,0,1,0,1,0,1,0,1,0
26
+ 1,3,1,3,1,3,1,3,1,3
27
+ 1,1,1,1,1,1,1,1,1,1
28
+ 1,3,1,3,1,3,1,3,1,3
29
+ 1,3,1,3,1,3,1,3,1,3
30
+ 1,3,1,3,1,3,1,3,1,3
31
+ 1,3,1,3,1,3,1,3,1,3
32
+ 1,3,1,3,1,3,1,3,1,3
33
+ 1,3,1,3,1,3,1,3,1,3
34
+ 1,3,1,3,1,3,1,3,1,3
35
+ 1,3,1,3,1,3,1,3,1,3
36
+ 1,3,1,3,1,3,1,3,1,3
37
+ 1,3,1,3,1,3,1,3,1,3
38
+ 1,5,0,1,5,0,1,5,0,1
39
+ 1,0,1,0,1,0,1,0,1,0
40
+ 1,5,1,5,1,5,1,5,1,5
41
+ 1,3,1,3,1,3,1,3,1,3
42
+ 1,5,0,1,5,0,1,5,0,1
43
+ 1,3,1,3,1,3,1,3,1,3
44
+ 1,3,1,3,1,3,1,3,1,3
45
+ 1,3,1,3,1,3,1,3,1,3
46
+ 1,3,1,3,1,3,1,3,1,3
47
+ 1,3,1,3,1,3,1,3,1,3
48
+ 1,3,1,3,1,3,1,3,1,3
49
+ 1,3,1,3,1,3,1,3,1,3
50
+ 1,3,1,3,1,3,1,3,1,3
51
+ 1,3,1,3,1,3,1,3,1,3
52
+ 1,3,1,3,1,3,1,3,1,3
53
+ 1,3,1,3,1,3,1,3,1,3
54
+ 1,3,1,3,1,3,1,3,1,3
55
+ 1,0,1,0,1,0,1,0,1,0
56
+ 1,1,1,1,1,1,1,1,1,1
57
+ 1,0,1,1,1,1,1,1,1,1
58
+ 1,3,1,3,1,3,1,3,1,3
59
+ 1,3,1,3,1,3,1,3,1,3
60
+ 1,0,1,0,1,0,1,0,1,0
61
+ 1,3,1,3,1,3,1,3,1,3
62
+ 1,5,1,5,1,5,1,5,1,5
63
+ 1,5,0,1,5,0,1,5,0,1
64
+ 1,3,1,3,1,3,1,3,1,3
seed_1000/iteration_010/agent:Bob_rewards.csv ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1,3,1,3,1,3,1,3,1,3
2
+ 1,0,1,0,1,0,1,0,1,0
3
+ 1,3,1,3,1,3,1,3,1,3
4
+ 1,3,1,3,1,3,1,3,1,3
5
+ 1,5,1,5,1,5,1,5,1,5
6
+ 1,3,1,3,1,3,1,3,1,3
7
+ 1,3,1,3,1,3,1,3,1,3
8
+ 1,3,1,3,1,3,1,3,1,3
9
+ 1,3,1,3,1,3,1,3,1,3
10
+ 1,0,1,0,1,0,1,0,1,0
11
+ 1,0,5,1,0,5,1,0,5,1
12
+ 1,5,1,5,1,5,1,5,1,5
13
+ 1,3,1,3,1,3,1,3,1,3
14
+ 1,5,0,1,5,0,1,5,0,1
15
+ 1,3,1,3,1,3,1,3,1,3
16
+ 1,3,1,3,1,3,1,3,1,3
17
+ 1,5,0,1,5,0,1,5,0,1
18
+ 1,5,1,1,1,1,1,1,1,1
19
+ 1,3,1,3,1,3,1,3,1,3
20
+ 1,1,1,1,1,1,1,1,1,1
21
+ 1,3,1,3,1,3,1,3,1,3
22
+ 1,3,1,3,1,3,1,3,1,3
23
+ 1,5,1,1,1,1,1,1,1,1
24
+ 1,5,0,1,5,0,1,5,0,1
25
+ 1,5,1,5,1,5,1,5,1,5
26
+ 1,3,1,3,1,3,1,3,1,3
27
+ 1,1,1,1,1,1,1,1,1,1
28
+ 1,3,1,3,1,3,1,3,1,3
29
+ 1,3,1,3,1,3,1,3,1,3
30
+ 1,3,1,3,1,3,1,3,1,3
31
+ 1,3,1,3,1,3,1,3,1,3
32
+ 1,3,1,3,1,3,1,3,1,3
33
+ 1,3,1,3,1,3,1,3,1,3
34
+ 1,3,1,3,1,3,1,3,1,3
35
+ 1,3,1,3,1,3,1,3,1,3
36
+ 1,3,1,3,1,3,1,3,1,3
37
+ 1,3,1,3,1,3,1,3,1,3
38
+ 1,0,5,1,0,5,1,0,5,1
39
+ 1,5,1,5,1,5,1,5,1,5
40
+ 1,0,1,0,1,0,1,0,1,0
41
+ 1,3,1,3,1,3,1,3,1,3
42
+ 1,0,5,1,0,5,1,0,5,1
43
+ 1,3,1,3,1,3,1,3,1,3
44
+ 1,3,1,3,1,3,1,3,1,3
45
+ 1,3,1,3,1,3,1,3,1,3
46
+ 1,3,1,3,1,3,1,3,1,3
47
+ 1,3,1,3,1,3,1,3,1,3
48
+ 1,3,1,3,1,3,1,3,1,3
49
+ 1,3,1,3,1,3,1,3,1,3
50
+ 1,3,1,3,1,3,1,3,1,3
51
+ 1,3,1,3,1,3,1,3,1,3
52
+ 1,3,1,3,1,3,1,3,1,3
53
+ 1,3,1,3,1,3,1,3,1,3
54
+ 1,3,1,3,1,3,1,3,1,3
55
+ 1,5,1,5,1,5,1,5,1,5
56
+ 1,1,1,1,1,1,1,1,1,1
57
+ 1,5,1,1,1,1,1,1,1,1
58
+ 1,3,1,3,1,3,1,3,1,3
59
+ 1,3,1,3,1,3,1,3,1,3
60
+ 1,5,1,5,1,5,1,5,1,5
61
+ 1,3,1,3,1,3,1,3,1,3
62
+ 1,0,1,0,1,0,1,0,1,0
63
+ 1,0,5,1,0,5,1,0,5,1
64
+ 1,3,1,3,1,3,1,3,1,3
seed_1000/iteration_010/agent_trainer_log/basic_training_metrics_2025-08-20___08-22-53.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nb_rollouts": [
3
+ 128
4
+ ],
5
+ "loss_mb_total": {
6
+ "value_mb_total": [
7
+ -2.746885299682617,
8
+ 1.085717797279358,
9
+ 0.683306872844696,
10
+ 1.2547194957733154,
11
+ -2.915886878967285,
12
+ 0.877143144607544,
13
+ 0.8918816447257996,
14
+ -4.0617899894714355,
15
+ -4.987060546875,
16
+ 1.2547194957733154,
17
+ 0.1208738312125206,
18
+ -2.746885299682617,
19
+ -0.033389389514923096,
20
+ 1.085717797279358,
21
+ -2.7468855381011963,
22
+ -1.375089168548584,
23
+ -0.27467450499534607,
24
+ 0.42827966809272766,
25
+ 0.08684346079826355,
26
+ 0.6158124804496765,
27
+ -0.4622073471546173,
28
+ 0.23840229213237762,
29
+ 0.2767208516597748,
30
+ -4.618410587310791,
31
+ -4.849747180938721,
32
+ 0.6158124804496765,
33
+ 0.19459812343120575,
34
+ -0.27467453479766846,
35
+ 0.04538390412926674,
36
+ 0.42827966809272766,
37
+ -0.27467456459999084,
38
+ -8.071660041809082
39
+ ]
40
+ },
41
+ "gradient_norm": [
42
+ 22.139535903930664
43
+ ]
44
+ }
seed_1000/iteration_010/mgid:11254192_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:11380392_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:11404926_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:12095407_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:12810834_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:13401802_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:14231226_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:14531618_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:14845387_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:18564316_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:18788754_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:18940688_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:19148172_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:20100269_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:20135678_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:20448792_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:20726379_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:21722326_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:23064020_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:23644929_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:24470558_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:24640213_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:24794623_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:24849914_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:25615508_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:26346888_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:27097149_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:27368155_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:27388971_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:27840658_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:28087463_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:29046134_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:30728573_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:31601106_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:31818626_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:32197308_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:32846198_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:33898430_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:42647546_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:50824074_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:57340378_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:72506343_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_010/mgid:99461611_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_011/agent:Alice_rewards.csv ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1,3,1,3,1,3,1,3,1,3
2
+ 1,3,1,3,1,3,1,3,1,3
3
+ 1,3,1,3,1,3,1,3,1,3
4
+ 1,3,1,3,1,3,1,3,1,3
5
+ 1,3,1,3,1,3,1,3,1,3
6
+ 1,0,1,1,1,1,1,1,1,1
7
+ 1,3,1,3,1,3,1,3,1,3
8
+ 1,5,1,5,1,5,1,5,1,5
9
+ 1,3,1,3,1,3,1,3,1,3
10
+ 1,3,1,3,1,3,1,3,1,3
11
+ 1,3,1,3,1,3,1,3,1,3
12
+ 1,3,1,3,1,3,1,3,1,3
13
+ 1,3,1,3,1,3,1,3,1,3
14
+ 1,3,1,3,1,3,1,3,1,3
15
+ 1,3,1,3,1,3,1,3,1,3
16
+ 1,3,1,3,1,3,1,3,1,3
17
+ 1,3,1,3,1,3,1,3,1,3
18
+ 1,3,1,3,1,3,1,3,1,3
19
+ 1,3,1,3,1,3,1,3,1,3
20
+ 1,3,1,3,1,3,1,3,1,3
21
+ 1,3,1,3,1,3,1,3,1,3
22
+ 1,5,1,5,1,5,1,5,1,5
23
+ 1,5,1,5,1,5,1,5,1,5
24
+ 1,3,1,3,1,3,1,3,1,3
25
+ 1,3,1,3,1,3,1,3,1,3
26
+ 1,0,5,1,0,5,1,0,5,1
27
+ 1,3,1,3,1,3,1,3,1,3
28
+ 1,3,1,3,1,3,1,3,1,3
29
+ 1,3,1,3,1,3,1,3,1,3
30
+ 1,1,1,1,1,1,1,1,1,1
31
+ 1,5,1,5,1,5,1,5,1,5
32
+ 1,3,1,3,1,3,1,3,1,3
33
+ 1,3,1,3,1,3,1,3,1,3
34
+ 1,3,1,3,1,3,1,3,1,3
35
+ 1,5,1,5,1,5,1,5,1,5
36
+ 1,3,1,3,1,3,1,3,1,3
37
+ 1,3,1,3,1,3,1,3,1,3
38
+ 1,3,1,3,1,3,1,3,1,3
39
+ 1,3,1,3,1,3,1,3,1,3
40
+ 1,5,3,1,1,1,1,1,1,1
41
+ 1,5,5,1,0,0,0,1,5,0
42
+ 1,5,1,5,1,5,1,5,1,5
43
+ 1,5,0,1,5,0,1,5,0,1
44
+ 1,3,1,3,1,3,1,3,1,3
45
+ 1,3,1,3,1,3,1,3,1,3
46
+ 1,3,1,3,1,3,1,3,1,3
47
+ 1,3,1,3,1,3,1,3,1,3
48
+ 1,3,1,3,1,3,1,3,1,3
49
+ 1,1,1,1,1,1,1,1,1,1
50
+ 1,1,1,1,1,1,1,1,1,1
51
+ 1,5,1,5,1,5,1,5,1,5
52
+ 1,3,1,3,1,3,1,3,1,3
53
+ 1,3,1,3,1,3,1,3,1,3
54
+ 1,1,1,1,1,1,1,1,1,1
55
+ 1,3,1,3,1,3,1,3,1,3
56
+ 1,3,1,3,1,3,1,3,1,3
57
+ 1,3,1,3,1,3,1,3,1,3
58
+ 1,3,1,3,1,3,1,3,1,3
59
+ 1,3,1,3,1,3,1,3,1,3
60
+ 1,3,1,3,1,3,1,3,1,3
61
+ 1,5,1,5,1,5,1,5,1,5
62
+ 1,3,1,3,1,3,1,3,1,3
63
+ 1,3,1,3,1,3,1,3,1,3
64
+ 1,3,1,3,1,3,1,3,1,3
seed_1000/iteration_011/agent:Bob_rewards.csv ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1,3,1,3,1,3,1,3,1,3
2
+ 1,3,1,3,1,3,1,3,1,3
3
+ 1,3,1,3,1,3,1,3,1,3
4
+ 1,3,1,3,1,3,1,3,1,3
5
+ 1,3,1,3,1,3,1,3,1,3
6
+ 1,5,1,1,1,1,1,1,1,1
7
+ 1,3,1,3,1,3,1,3,1,3
8
+ 1,0,1,0,1,0,1,0,1,0
9
+ 1,3,1,3,1,3,1,3,1,3
10
+ 1,3,1,3,1,3,1,3,1,3
11
+ 1,3,1,3,1,3,1,3,1,3
12
+ 1,3,1,3,1,3,1,3,1,3
13
+ 1,3,1,3,1,3,1,3,1,3
14
+ 1,3,1,3,1,3,1,3,1,3
15
+ 1,3,1,3,1,3,1,3,1,3
16
+ 1,3,1,3,1,3,1,3,1,3
17
+ 1,3,1,3,1,3,1,3,1,3
18
+ 1,3,1,3,1,3,1,3,1,3
19
+ 1,3,1,3,1,3,1,3,1,3
20
+ 1,3,1,3,1,3,1,3,1,3
21
+ 1,3,1,3,1,3,1,3,1,3
22
+ 1,0,1,0,1,0,1,0,1,0
23
+ 1,0,1,0,1,0,1,0,1,0
24
+ 1,3,1,3,1,3,1,3,1,3
25
+ 1,3,1,3,1,3,1,3,1,3
26
+ 1,5,0,1,5,0,1,5,0,1
27
+ 1,3,1,3,1,3,1,3,1,3
28
+ 1,3,1,3,1,3,1,3,1,3
29
+ 1,3,1,3,1,3,1,3,1,3
30
+ 1,1,1,1,1,1,1,1,1,1
31
+ 1,0,1,0,1,0,1,0,1,0
32
+ 1,3,1,3,1,3,1,3,1,3
33
+ 1,3,1,3,1,3,1,3,1,3
34
+ 1,3,1,3,1,3,1,3,1,3
35
+ 1,0,1,0,1,0,1,0,1,0
36
+ 1,3,1,3,1,3,1,3,1,3
37
+ 1,3,1,3,1,3,1,3,1,3
38
+ 1,3,1,3,1,3,1,3,1,3
39
+ 1,3,1,3,1,3,1,3,1,3
40
+ 1,0,3,1,1,1,1,1,1,1
41
+ 1,0,0,1,5,5,5,1,0,5
42
+ 1,0,1,0,1,0,1,0,1,0
43
+ 1,0,5,1,0,5,1,0,5,1
44
+ 1,3,1,3,1,3,1,3,1,3
45
+ 1,3,1,3,1,3,1,3,1,3
46
+ 1,3,1,3,1,3,1,3,1,3
47
+ 1,3,1,3,1,3,1,3,1,3
48
+ 1,3,1,3,1,3,1,3,1,3
49
+ 1,1,1,1,1,1,1,1,1,1
50
+ 1,1,1,1,1,1,1,1,1,1
51
+ 1,0,1,0,1,0,1,0,1,0
52
+ 1,3,1,3,1,3,1,3,1,3
53
+ 1,3,1,3,1,3,1,3,1,3
54
+ 1,1,1,1,1,1,1,1,1,1
55
+ 1,3,1,3,1,3,1,3,1,3
56
+ 1,3,1,3,1,3,1,3,1,3
57
+ 1,3,1,3,1,3,1,3,1,3
58
+ 1,3,1,3,1,3,1,3,1,3
59
+ 1,3,1,3,1,3,1,3,1,3
60
+ 1,3,1,3,1,3,1,3,1,3
61
+ 1,0,1,0,1,0,1,0,1,0
62
+ 1,3,1,3,1,3,1,3,1,3
63
+ 1,3,1,3,1,3,1,3,1,3
64
+ 1,3,1,3,1,3,1,3,1,3
seed_1000/iteration_011/agent_trainer_log/basic_training_metrics_2025-08-20___08-23-48.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nb_rollouts": [
3
+ 128
4
+ ],
5
+ "loss_mb_total": {
6
+ "value_mb_total": [
7
+ -0.29679375886917114,
8
+ -0.5579026341438293,
9
+ -1.4579201936721802,
10
+ -1.196811318397522,
11
+ -0.29679375886917114,
12
+ -0.44192543625831604,
13
+ -0.7118668556213379,
14
+ -0.7468025088310242,
15
+ -1.3146623373031616,
16
+ -0.29679375886917114,
17
+ -0.7468025088310242,
18
+ -0.43191924691200256,
19
+ -0.7468025088310242,
20
+ -0.5579026341438293,
21
+ -0.29679375886917114,
22
+ -1.2052409648895264,
23
+ 1.6331124305725098,
24
+ 1.2869794368743896,
25
+ -6.386050701141357,
26
+ -6.039918899536133,
27
+ 1.6331124305725098,
28
+ 1.5150965452194214,
29
+ 0.3584187924861908,
30
+ -2.2034029960632324,
31
+ 1.2354718446731567,
32
+ 1.6331124305725098,
33
+ -2.2034029960632324,
34
+ 1.4159551858901978,
35
+ -2.2034029960632324,
36
+ 1.2869794368743896,
37
+ 1.6331124305725098,
38
+ -2.1715247631073
39
+ ]
40
+ },
41
+ "gradient_norm": [
42
+ 18.473228454589844
43
+ ]
44
+ }