Deep RL Course 2nd try

Browse files

Files changed (14) hide show

SnowballTarget.onnx +2 -2
SnowballTarget/SnowballTarget-100408.onnx +2 -2
SnowballTarget/SnowballTarget-100408.pt +2 -2
SnowballTarget/SnowballTarget-49936.onnx +2 -2
SnowballTarget/SnowballTarget-49936.pt +2 -2
SnowballTarget/SnowballTarget-99960.onnx +2 -2
SnowballTarget/SnowballTarget-99960.pt +2 -2
SnowballTarget/checkpoint.pt +2 -2
SnowballTarget/events.out.tfevents.1719035001.c032e391e02e.9091.0 +3 -0
config.json +1 -1
configuration.yaml +3 -3
run_logs/Player-0.log +5 -8
run_logs/timers.json +140 -140
run_logs/training_status.json +8 -8

SnowballTarget.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec052258055c506159ca657d3003d16cb392d58a307e943ecd9107ed1f55ac8a
-size 650646

 version https://git-lfs.github.com/spec/v1
+oid sha256:896e725cc0349fa3fd91341e09c51055e297aa4dfe21d170488645918778af80
+size 331587

SnowballTarget/SnowballTarget-100408.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec052258055c506159ca657d3003d16cb392d58a307e943ecd9107ed1f55ac8a
-size 650646

 version https://git-lfs.github.com/spec/v1
+oid sha256:896e725cc0349fa3fd91341e09c51055e297aa4dfe21d170488645918778af80
+size 331587

SnowballTarget/SnowballTarget-100408.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29850c12ff293cbfbe4e495001eeeb9935dcdfe06f683ab2e6aef019dfaa3864
-size 3850075

 version https://git-lfs.github.com/spec/v1
+oid sha256:ffc19dc0ddb01732d792cfafd051558064d0b967b9e5250d0fc2f090117ade2a
+size 1945995

SnowballTarget/SnowballTarget-49936.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8067103560ce037e4800a70955c676c2aa1bbb7dce617575bc8a8c03a2070438
-size 650646

 version https://git-lfs.github.com/spec/v1
+oid sha256:8cecf764be80e9dbf3c176c8f55f8cc36e5b1727a2ff309dd081df4fab1a45d3
+size 331587

SnowballTarget/SnowballTarget-49936.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1ae2f8d30bef38092ca8f1ff303ae55aea61beafa158d06229b704611935ea1
-size 3850000

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a05dbac9f7a7e28c86b958e6b1be4ddb791626622af3decd967278de0feec5c
+size 1945904

SnowballTarget/SnowballTarget-99960.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec052258055c506159ca657d3003d16cb392d58a307e943ecd9107ed1f55ac8a
-size 650646

 version https://git-lfs.github.com/spec/v1
+oid sha256:896e725cc0349fa3fd91341e09c51055e297aa4dfe21d170488645918778af80
+size 331587

SnowballTarget/SnowballTarget-99960.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ad473f2a04035d8f04c82a08e98ed4b8a3fa0b51fc160de6b53e451b70c9acc
-size 3850000

 version https://git-lfs.github.com/spec/v1
+oid sha256:7469bf4fc633cf955952d71988d96a18863dd7a6d663d010b80ecbe4e2766cc8
+size 1945904

SnowballTarget/checkpoint.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb1f13abc6af8aa0925153b373f30975ccde1785bf54d18d039be13035432626
-size 3849250

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9ffc836b6f8eecefba35dbf2cf33bfa08478709308932fd0f7d4881b49ecff6
+size 1944994

SnowballTarget/events.out.tfevents.1719035001.c032e391e02e.9091.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fcf541f02d2f11767cd44a66d9f82f072d6dd5772345860ccf9e95240f8fbc45
+size 20533

config.json CHANGED Viewed

@@ -1 +1 @@

- {"default_settings": null, "behaviors": {"SnowballTarget": {"trainer_type": "ppo", "hyperparameters": {"batch_size": 128, "buffer_size": ~~2048~~, "learning_rate": 0.0003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "shared_critic": false, "learning_rate_schedule": "linear", "beta_schedule": "linear", "epsilon_schedule": "linear"}, "checkpoint_interval": 50000, "network_settings": {"normalize": false, "hidden_units": ~~256~~, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 10, "even_checkpoints": false, "max_steps": 100000, "time_horizon": 64, "summary_freq": 10000, "threaded": true, "self_play": null, "behavioral_cloning": null}}, "env_settings": {"env_path": "./training-envs-executables/linux/SnowballTarget/SnowballTarget", "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "timeout_wait": 60, "seed": -1, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": true, "no_graphics_monitor": false}, "environment_parameters": null, "checkpoint_settings": {"run_id": "SnowballTarget1", "initialize_from": null, "load_model": false, "resume": false, "force": true, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}

+ {"default_settings": null, "behaviors": {"SnowballTarget": {"trainer_type": "ppo", "hyperparameters": {"batch_size": 128, "buffer_size": 4096, "learning_rate": 0.0003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "shared_critic": false, "learning_rate_schedule": "linear", "beta_schedule": "linear", "epsilon_schedule": "linear"}, "checkpoint_interval": 50000, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 3, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 10, "even_checkpoints": false, "max_steps": 100000, "time_horizon": 64, "summary_freq": 10000, "threaded": true, "self_play": null, "behavioral_cloning": null}}, "env_settings": {"env_path": "./training-envs-executables/linux/SnowballTarget/SnowballTarget", "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "timeout_wait": 60, "seed": -1, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": true, "no_graphics_monitor": false}, "environment_parameters": null, "checkpoint_settings": {"run_id": "SnowballTarget1", "initialize_from": null, "load_model": false, "resume": false, "force": true, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}

configuration.yaml CHANGED Viewed

@@ -4,7 +4,7 @@ behaviors:
     trainer_type: ppo
     hyperparameters:
       batch_size: 128
-      buffer_size: 2048
       learning_rate: 0.0003
       beta: 0.005
       epsilon: 0.2
@@ -17,8 +17,8 @@ behaviors:
     checkpoint_interval: 50000
     network_settings:
       normalize: false
-      hidden_units: 256
-      num_layers: 2
       vis_encode_type: simple
       memory: null
       goal_conditioning_type: hyper

     trainer_type: ppo
     hyperparameters:
       batch_size: 128
+      buffer_size: 4096
       learning_rate: 0.0003
       beta: 0.005
       epsilon: 0.2
     checkpoint_interval: 50000
     network_settings:
       normalize: false
+      hidden_units: 128
+      num_layers: 3
       vis_encode_type: simple
       memory: null
       goal_conditioning_type: hyper

run_logs/Player-0.log CHANGED Viewed

@@ -2,9 +2,6 @@ Mono path[0] = '/content/ml-agents/training-envs-executables/linux/SnowballTarge
 Mono config path = '/content/ml-agents/training-envs-executables/linux/SnowballTarget/SnowballTarget_Data/MonoBleedingEdge/etc'
 Preloaded 'lib_burst_generated.so'
 Preloaded 'libgrpc_csharp_ext.x64.so'
-PlayerPrefs - Creating folder: /root/.config/unity3d/Hugging Face
-PlayerPrefs - Creating folder: /root/.config/unity3d/Hugging Face/SnowballTarget
-Unable to load player prefs
 Initialize engine version: 2021.3.14f1 (eee1884e7226)
 [Subsystems] Discovering subsystems at path /content/ml-agents/training-envs-executables/linux/SnowballTarget/SnowballTarget_Data/UnitySubsystems
 Forcing GfxDevice: Null
@@ -34,7 +31,7 @@ ALSA lib pcm.c:2664:(snd_pcm_open_noupdate) Unknown PCM default
 FMOD failed to initialize the output device.: "Error initializing output device. " (60)
 FMOD initialized on nosound output
 Begin MonoManager ReloadAssembly
-- Completed reload, in  0.099 seconds
 ERROR: Shader Sprites/Default shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
 ERROR: Shader Sprites/Mask shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
 ERROR: Shader Legacy Shaders/VertexLit shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
@@ -45,7 +42,7 @@ ERROR: Shader Standard shader is not supported on this GPU (none of subshaders/f
 WARNING: Shader Unsupported: 'Standard' - All subshaders removed
 WARNING: Shader Did you use #pragma only_renderers and omit this platform?
 WARNING: Shader If subshaders removal was intentional, you may have forgotten turning Fallback off?
-UnloadTime: 0.697229 ms
 ERROR: Shader UI/Default shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
 requesting resize 84 x 84
 Setting up 1 worker threads for Enlighten.
@@ -169,7 +166,7 @@ Memory Statistics:
       Peak Allocated Bytes 0 B
       Overflow Count 0
 [ALLOC_DEFAULT] Dual Thread Allocator
-  Peak main deferred allocation count 37
     [ALLOC_BUCKET]
       Large Block size 4.0 MB
       Used Block count 1
@@ -213,10 +210,10 @@ Memory Statistics:
       Used Block count 1
       Peak Allocated bytes 1.0 MB
     [ALLOC_GFX_MAIN]
-      Peak usage frame count: [32.0 KB-64.0 KB]: 2392 frames, [64.0 KB-128.0 KB]: 349 frames
       Requested Block Size 16.0 MB
       Peak Block count 1
-      Peak Allocated memory 66.7 KB
       Peak Large allocation bytes 0 B
     [ALLOC_GFX_THREAD]
       Peak usage frame count: [32.0 KB-64.0 KB]: 2741 frames

 Mono config path = '/content/ml-agents/training-envs-executables/linux/SnowballTarget/SnowballTarget_Data/MonoBleedingEdge/etc'
 Preloaded 'lib_burst_generated.so'
 Preloaded 'libgrpc_csharp_ext.x64.so'
 Initialize engine version: 2021.3.14f1 (eee1884e7226)
 [Subsystems] Discovering subsystems at path /content/ml-agents/training-envs-executables/linux/SnowballTarget/SnowballTarget_Data/UnitySubsystems
 Forcing GfxDevice: Null
 FMOD failed to initialize the output device.: "Error initializing output device. " (60)
 FMOD initialized on nosound output
 Begin MonoManager ReloadAssembly
+- Completed reload, in  0.086 seconds
 ERROR: Shader Sprites/Default shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
 ERROR: Shader Sprites/Mask shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
 ERROR: Shader Legacy Shaders/VertexLit shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
 WARNING: Shader Unsupported: 'Standard' - All subshaders removed
 WARNING: Shader Did you use #pragma only_renderers and omit this platform?
 WARNING: Shader If subshaders removal was intentional, you may have forgotten turning Fallback off?
+UnloadTime: 0.744692 ms
 ERROR: Shader UI/Default shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
 requesting resize 84 x 84
 Setting up 1 worker threads for Enlighten.
       Peak Allocated Bytes 0 B
       Overflow Count 0
 [ALLOC_DEFAULT] Dual Thread Allocator
+  Peak main deferred allocation count 44
     [ALLOC_BUCKET]
       Large Block size 4.0 MB
       Used Block count 1
       Used Block count 1
       Peak Allocated bytes 1.0 MB
     [ALLOC_GFX_MAIN]
+      Peak usage frame count: [32.0 KB-64.0 KB]: 2058 frames, [64.0 KB-128.0 KB]: 683 frames
       Requested Block Size 16.0 MB
       Peak Block count 1
+      Peak Allocated memory 66.8 KB
       Peak Large allocation bytes 0 B
     [ALLOC_GFX_THREAD]
       Peak usage frame count: [32.0 KB-64.0 KB]: 2741 frames

run_logs/timers.json CHANGED Viewed

@@ -2,15 +2,15 @@
     "name": "root",
     "gauges": {
         "SnowballTarget.Policy.Entropy.mean": {
-            "value": 1.6545395851135254,
-            "min": 1.6545395851135254,
-            "max": 2.865912914276123,
             "count": 10
         },
         "SnowballTarget.Policy.Entropy.sum": {
-            "value": 16853.140625,
-            "min": 16853.140625,
-            "max": 29475.9140625,
             "count": 10
         },
         "SnowballTarget.Step.mean": {
@@ -26,15 +26,15 @@
             "count": 10
         },
         "SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
-            "value": 9.161903381347656,
-            "min": 0.3639596402645111,
-            "max": 9.161903381347656,
             "count": 10
         },
         "SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
-            "value": 1869.0283203125,
-            "min": 70.60816955566406,
-            "max": 1869.0283203125,
             "count": 10
         },
         "SnowballTarget.Environment.EpisodeLength.mean": {
@@ -49,88 +49,88 @@
             "max": 10945.0,
             "count": 10
         },
         "SnowballTarget.Losses.PolicyLoss.mean": {
-            "value": 0.06668726046298296,
-            "min": 0.06566922007512081,
-            "max": 0.07467531853556056,
             "count": 10
         },
         "SnowballTarget.Losses.PolicyLoss.sum": {
-            "value": 0.3334363023149148,
-            "min": 0.2677655371906653,
-            "max": 0.3551044942635367,
             "count": 10
         },
         "SnowballTarget.Losses.ValueLoss.mean": {
-            "value": 0.2572032511818643,
-            "min": 0.122001881134815,
-            "max": 0.2861780487731392,
             "count": 10
         },
         "SnowballTarget.Losses.ValueLoss.sum": {
-            "value": 1.2860162559093216,
-            "min": 0.48800752453926,
-            "max": 1.3895678444235933,
             "count": 10
         },
         "SnowballTarget.Policy.LearningRate.mean": {
-            "value": 1.6464094511999996e-05,
-            "min": 1.6464094511999996e-05,
-            "max": 0.000283764005412,
             "count": 10
         },
         "SnowballTarget.Policy.LearningRate.sum": {
-            "value": 8.232047255999999e-05,
-            "min": 8.232047255999999e-05,
-            "max": 0.00127032007656,
             "count": 10
         },
         "SnowballTarget.Policy.Epsilon.mean": {
-            "value": 0.105488,
-            "min": 0.105488,
-            "max": 0.194588,
             "count": 10
         },
         "SnowballTarget.Policy.Epsilon.sum": {
-            "value": 0.52744,
-            "min": 0.4615520000000001,
-            "max": 0.92344,
             "count": 10
         },
         "SnowballTarget.Policy.Beta.mean": {
             "value": 0.0002838512,
             "min": 0.0002838512,
-            "max": 0.0047299412,
             "count": 10
         },
         "SnowballTarget.Policy.Beta.sum": {
-            "value": 0.001419256,
-            "min": 0.001419256,
-            "max": 0.021179656,
-            "count": 10
-        },
-        "SnowballTarget.Environment.CumulativeReward.mean": {
-            "value": 19.90909090909091,
-            "min": 3.659090909090909,
-            "max": 19.90909090909091,
-            "count": 10
-        },
-        "SnowballTarget.Environment.CumulativeReward.sum": {
-            "value": 1095.0,
-            "min": 161.0,
-            "max": 1095.0,
-            "count": 10
-        },
-        "SnowballTarget.Policy.ExtrinsicReward.mean": {
-            "value": 19.90909090909091,
-            "min": 3.659090909090909,
-            "max": 19.90909090909091,
-            "count": 10
-        },
-        "SnowballTarget.Policy.ExtrinsicReward.sum": {
-            "value": 1095.0,
-            "min": 161.0,
-            "max": 1095.0,
             "count": 10
         },
         "SnowballTarget.IsTraining.mean": {
@@ -148,7 +148,7 @@
     },
     "metadata": {
         "timer_format_version": "0.1.0",
-        "start_time_seconds": "1719033731",
         "python_version": "3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]",
         "command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics --force",
         "mlagents_version": "1.1.0.dev0",
@@ -156,59 +156,59 @@
         "communication_protocol_version": "1.5.0",
         "pytorch_version": "2.3.0+cu121",
         "numpy_version": "1.23.5",
-        "end_time_seconds": "1719033955"
     },
-    "total": 224.69840804800003,
     "count": 1,
-    "self": 0.43245920100002877,
     "children": {
         "run_training.setup": {
-            "total": 0.0769518119999475,
             "count": 1,
-            "self": 0.0769518119999475
         },
         "TrainerController.start_learning": {
-            "total": 224.18899703500006,
             "count": 1,
-            "self": 0.5460637679990441,
             "children": {
                 "TrainerController._reset_env": {
-                    "total": 3.0302591349999375,
                     "count": 1,
-                    "self": 3.0302591349999375
                 },
                 "TrainerController.advance": {
-                    "total": 220.51003486000127,
-                    "count": 9135,
-                    "self": 0.13079704700112416,
                     "children": {
                         "env_step": {
-                            "total": 220.37923781300015,
-                            "count": 9135,
-                            "self": 142.10116460199583,
                             "children": {
                                 "SubprocessEnvManager._take_step": {
-                                    "total": 78.13666314100203,
-                                    "count": 9135,
-                                    "self": 0.709724793006103,
                                     "children": {
                                         "TorchPolicy.evaluate": {
-                                            "total": 77.42693834799593,
-                                            "count": 9135,
-                                            "self": 77.42693834799593
                                         }
                                     }
                                 },
                                 "workers": {
-                                    "total": 0.14141007000228,
-                                    "count": 9135,
                                     "self": 0.0,
                                     "children": {
                                         "worker_root": {
-                                            "total": 223.33572198400668,
-                                            "count": 9135,
                                             "is_parallel": true,
-                                            "self": 114.77791775600201,
                                             "children": {
                                                 "run_training.setup": {
                                                     "total": 0.0,
@@ -217,48 +217,48 @@
                                                     "self": 0.0,
                                                     "children": {
                                                         "steps_from_proto": {
-                                                            "total": 0.007909454000014193,
                                                             "count": 1,
                                                             "is_parallel": true,
-                                                            "self": 0.00648123700011638,
                                                             "children": {
                                                                 "_process_rank_one_or_two_observation": {
-                                                                    "total": 0.0014282169998978134,
                                                                     "count": 10,
                                                                     "is_parallel": true,
-                                                                    "self": 0.0014282169998978134
                                                                 }
                                                             }
                                                         },
                                                         "UnityEnvironment.step": {
-                                                            "total": 0.03553736200001367,
                                                             "count": 1,
                                                             "is_parallel": true,
-                                                            "self": 0.0006540329999324968,
                                                             "children": {
                                                                 "UnityEnvironment._generate_step_input": {
-                                                                    "total": 0.0004143720000229223,
                                                                     "count": 1,
                                                                     "is_parallel": true,
-                                                                    "self": 0.0004143720000229223
                                                                 },
                                                                 "communicator.exchange": {
-                                                                    "total": 0.0326149100000066,
                                                                     "count": 1,
                                                                     "is_parallel": true,
-                                                                    "self": 0.0326149100000066
                                                                 },
                                                                 "steps_from_proto": {
-                                                                    "total": 0.0018540470000516507,
                                                                     "count": 1,
                                                                     "is_parallel": true,
-                                                                    "self": 0.0003548190002220508,
                                                                     "children": {
                                                                         "_process_rank_one_or_two_observation": {
-                                                                            "total": 0.0014992279998295999,
                                                                             "count": 10,
                                                                             "is_parallel": true,
-                                                                            "self": 0.0014992279998295999
                                                                         }
                                                                     }
                                                                 }
@@ -267,34 +267,34 @@
                                                     }
                                                 },
                                                 "UnityEnvironment.step": {
-                                                    "total": 108.55780422800467,
-                                                    "count": 9134,
                                                     "is_parallel": true,
-                                                    "self": 4.9432859259984525,
                                                     "children": {
                                                         "UnityEnvironment._generate_step_input": {
-                                                            "total": 2.6019552990039756,
-                                                            "count": 9134,
                                                             "is_parallel": true,
-                                                            "self": 2.6019552990039756
                                                         },
                                                         "communicator.exchange": {
-                                                            "total": 85.07568894199892,
-                                                            "count": 9134,
                                                             "is_parallel": true,
-                                                            "self": 85.07568894199892
                                                         },
                                                         "steps_from_proto": {
-                                                            "total": 15.936874061003323,
-                                                            "count": 9134,
                                                             "is_parallel": true,
-                                                            "self": 2.9845476230069607,
                                                             "children": {
                                                                 "_process_rank_one_or_two_observation": {
-                                                                    "total": 12.952326437996362,
-                                                                    "count": 91340,
                                                                     "is_parallel": true,
-                                                                    "self": 12.952326437996362
                                                                 }
                                                             }
                                                         }
@@ -309,9 +309,9 @@
                     }
                 },
                 "trainer_threads": {
-                    "total": 0.0003972689999045542,
                     "count": 1,
-                    "self": 0.0003972689999045542,
                     "children": {
                         "thread_root": {
                             "total": 0.0,
@@ -320,36 +320,36 @@
                             "self": 0.0,
                             "children": {
                                 "trainer_advance": {
-                                    "total": 218.30389485791818,
-                                    "count": 321791,
                                     "is_parallel": true,
-                                    "self": 6.38754597197601,
                                     "children": {
                                         "process_trajectory": {
-                                            "total": 120.7924449289419,
-                                            "count": 321791,
                                             "is_parallel": true,
-                                            "self": 120.39559686894188,
                                             "children": {
                                                 "RLTrainer._checkpoint": {
-                                                    "total": 0.3968480600000248,
                                                     "count": 2,
                                                     "is_parallel": true,
-                                                    "self": 0.3968480600000248
                                                 }
                                             }
                                         },
                                         "_update_policy": {
-                                            "total": 91.12390395700027,
-                                            "count": 45,
                                             "is_parallel": true,
-                                            "self": 28.05040366300102,
                                             "children": {
                                                 "TorchPPOOptimizer.update": {
-                                                    "total": 63.073500293999246,
-                                                    "count": 2292,
                                                     "is_parallel": true,
-                                                    "self": 63.073500293999246
                                                 }
                                             }
                                         }
@@ -360,14 +360,14 @@
                     }
                 },
                 "TrainerController._save_models": {
-                    "total": 0.10224200299990116,
                     "count": 1,
-                    "self": 0.0010237589997359464,
                     "children": {
                         "RLTrainer._checkpoint": {
-                            "total": 0.10121824400016521,
                             "count": 1,
-                            "self": 0.10121824400016521
                         }
                     }
                 }

     "name": "root",
     "gauges": {
         "SnowballTarget.Policy.Entropy.mean": {
+            "value": 2.0896008014678955,
+            "min": 2.0896008014678955,
+            "max": 2.8781380653381348,
             "count": 10
         },
         "SnowballTarget.Policy.Entropy.sum": {
+            "value": 21307.66015625,
+            "min": 20771.3203125,
+            "max": 29475.01171875,
             "count": 10
         },
         "SnowballTarget.Step.mean": {
             "count": 10
         },
         "SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
+            "value": 5.565084457397461,
+            "min": 0.17080172896385193,
+            "max": 5.565084457397461,
             "count": 10
         },
         "SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
+            "value": 1135.2772216796875,
+            "min": 33.135536193847656,
+            "max": 1135.2772216796875,
             "count": 10
         },
         "SnowballTarget.Environment.EpisodeLength.mean": {
             "max": 10945.0,
             "count": 10
         },
+        "SnowballTarget.Environment.CumulativeReward.mean": {
+            "value": 13.8,
+            "min": 3.1363636363636362,
+            "max": 13.8,
+            "count": 10
+        },
+        "SnowballTarget.Environment.CumulativeReward.sum": {
+            "value": 759.0,
+            "min": 138.0,
+            "max": 759.0,
+            "count": 10
+        },
+        "SnowballTarget.Policy.ExtrinsicReward.mean": {
+            "value": 13.8,
+            "min": 3.1363636363636362,
+            "max": 13.8,
+            "count": 10
+        },
+        "SnowballTarget.Policy.ExtrinsicReward.sum": {
+            "value": 759.0,
+            "min": 138.0,
+            "max": 759.0,
+            "count": 10
+        },
         "SnowballTarget.Losses.PolicyLoss.mean": {
+            "value": 0.077521520225798,
+            "min": 0.06467075724332758,
+            "max": 0.077521520225798,
             "count": 10
         },
         "SnowballTarget.Losses.PolicyLoss.sum": {
+            "value": 0.155043040451596,
+            "min": 0.12934151448665515,
+            "max": 0.20724409716814418,
             "count": 10
         },
         "SnowballTarget.Losses.ValueLoss.mean": {
+            "value": 0.28602686633958535,
+            "min": 0.11679404042882538,
+            "max": 0.28602686633958535,
             "count": 10
         },
         "SnowballTarget.Losses.ValueLoss.sum": {
+            "value": 0.5720537326791707,
+            "min": 0.23358808085765076,
+            "max": 0.7956818916341837,
             "count": 10
         },
         "SnowballTarget.Policy.LearningRate.mean": {
+            "value": 1.6464094511999992e-05,
+            "min": 1.6464094511999992e-05,
+            "max": 0.00028046400651199994,
             "count": 10
         },
         "SnowballTarget.Policy.LearningRate.sum": {
+            "value": 3.2928189023999985e-05,
+            "min": 3.2928189023999985e-05,
+            "max": 0.0005839921053360001,
             "count": 10
         },
         "SnowballTarget.Policy.Epsilon.mean": {
+            "value": 0.10548799999999997,
+            "min": 0.10548799999999997,
+            "max": 0.19348800000000005,
             "count": 10
         },
         "SnowballTarget.Policy.Epsilon.sum": {
+            "value": 0.21097599999999994,
+            "min": 0.21097599999999994,
+            "max": 0.4946640000000001,
             "count": 10
         },
         "SnowballTarget.Policy.Beta.mean": {
             "value": 0.0002838512,
             "min": 0.0002838512,
+            "max": 0.0046750512,
             "count": 10
         },
         "SnowballTarget.Policy.Beta.sum": {
+            "value": 0.0005677024,
+            "min": 0.0005677024,
+            "max": 0.009743733600000004,
             "count": 10
         },
         "SnowballTarget.IsTraining.mean": {
     },
     "metadata": {
         "timer_format_version": "0.1.0",
+        "start_time_seconds": "1719035000",
         "python_version": "3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]",
         "command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics --force",
         "mlagents_version": "1.1.0.dev0",
         "communication_protocol_version": "1.5.0",
         "pytorch_version": "2.3.0+cu121",
         "numpy_version": "1.23.5",
+        "end_time_seconds": "1719035231"
     },
+    "total": 231.1935535309999,
     "count": 1,
+    "self": 0.42512272800013307,
     "children": {
         "run_training.setup": {
+            "total": 0.05121374600003037,
             "count": 1,
+            "self": 0.05121374600003037
         },
         "TrainerController.start_learning": {
+            "total": 230.71721705699974,
             "count": 1,
+            "self": 0.313961196010041,
             "children": {
                 "TrainerController._reset_env": {
+                    "total": 2.1678631690001566,
                     "count": 1,
+                    "self": 2.1678631690001566
                 },
                 "TrainerController.advance": {
+                    "total": 228.1420507909893,
+                    "count": 9136,
+                    "self": 0.13254497699517742,
                     "children": {
                         "env_step": {
+                            "total": 228.0095058139941,
+                            "count": 9136,
+                            "self": 157.48329376299944,
                             "children": {
                                 "SubprocessEnvManager._take_step": {
+                                    "total": 70.39158679798129,
+                                    "count": 9136,
+                                    "self": 0.6971241959809049,
                                     "children": {
                                         "TorchPolicy.evaluate": {
+                                            "total": 69.69446260200039,
+                                            "count": 9136,
+                                            "self": 69.69446260200039
                                         }
                                     }
                                 },
                                 "workers": {
+                                    "total": 0.13462525301338246,
+                                    "count": 9136,
                                     "self": 0.0,
                                     "children": {
                                         "worker_root": {
+                                            "total": 230.01476085700733,
+                                            "count": 9136,
                                             "is_parallel": true,
+                                            "self": 120.64642620004179,
                                             "children": {
                                                 "run_training.setup": {
                                                     "total": 0.0,
                                                     "self": 0.0,
                                                     "children": {
                                                         "steps_from_proto": {
+                                                            "total": 0.0020915219997732493,
                                                             "count": 1,
                                                             "is_parallel": true,
+                                                            "self": 0.0006469720001405221,
                                                             "children": {
                                                                 "_process_rank_one_or_two_observation": {
+                                                                    "total": 0.0014445499996327271,
                                                                     "count": 10,
                                                                     "is_parallel": true,
+                                                                    "self": 0.0014445499996327271
                                                                 }
                                                             }
                                                         },
                                                         "UnityEnvironment.step": {
+                                                            "total": 0.03532054499964943,
                                                             "count": 1,
                                                             "is_parallel": true,
+                                                            "self": 0.0006808919997638441,
                                                             "children": {
                                                                 "UnityEnvironment._generate_step_input": {
+                                                                    "total": 0.00038987700008874526,
                                                                     "count": 1,
                                                                     "is_parallel": true,
+                                                                    "self": 0.00038987700008874526
                                                                 },
                                                                 "communicator.exchange": {
+                                                                    "total": 0.03237776700007089,
                                                                     "count": 1,
                                                                     "is_parallel": true,
+                                                                    "self": 0.03237776700007089
                                                                 },
                                                                 "steps_from_proto": {
+                                                                    "total": 0.0018720089997259493,
                                                                     "count": 1,
                                                                     "is_parallel": true,
+                                                                    "self": 0.0003487399994810403,
                                                                     "children": {
                                                                         "_process_rank_one_or_two_observation": {
+                                                                            "total": 0.001523269000244909,
                                                                             "count": 10,
                                                                             "is_parallel": true,
+                                                                            "self": 0.001523269000244909
                                                                         }
                                                                     }
                                                                 }
                                                     }
                                                 },
                                                 "UnityEnvironment.step": {
+                                                    "total": 109.36833465696554,
+                                                    "count": 9135,
                                                     "is_parallel": true,
+                                                    "self": 4.979603768009383,
                                                     "children": {
                                                         "UnityEnvironment._generate_step_input": {
+                                                            "total": 2.6678287209888367,
+                                                            "count": 9135,
                                                             "is_parallel": true,
+                                                            "self": 2.6678287209888367
                                                         },
                                                         "communicator.exchange": {
+                                                            "total": 85.64176659998293,
+                                                            "count": 9135,
                                                             "is_parallel": true,
+                                                            "self": 85.64176659998293
                                                         },
                                                         "steps_from_proto": {
+                                                            "total": 16.079135567984395,
+                                                            "count": 9135,
                                                             "is_parallel": true,
+                                                            "self": 2.954485399956411,
                                                             "children": {
                                                                 "_process_rank_one_or_two_observation": {
+                                                                    "total": 13.124650168027983,
+                                                                    "count": 91350,
                                                                     "is_parallel": true,
+                                                                    "self": 13.124650168027983
                                                                 }
                                                             }
                                                         }
                     }
                 },
                 "trainer_threads": {
+                    "total": 0.0002983570002470515,
                     "count": 1,
+                    "self": 0.0002983570002470515,
                     "children": {
                         "thread_root": {
                             "total": 0.0,
                             "self": 0.0,
                             "children": {
                                 "trainer_advance": {
+                                    "total": 224.79246962912157,
+                                    "count": 420750,
                                     "is_parallel": true,
+                                    "self": 8.413199142081794,
                                     "children": {
                                         "process_trajectory": {
+                                            "total": 148.7846739390393,
+                                            "count": 420750,
                                             "is_parallel": true,
+                                            "self": 148.46090210403918,
                                             "children": {
                                                 "RLTrainer._checkpoint": {
+                                                    "total": 0.32377183500011597,
                                                     "count": 2,
                                                     "is_parallel": true,
+                                                    "self": 0.32377183500011597
                                                 }
                                             }
                                         },
                                         "_update_policy": {
+                                            "total": 67.59459654800048,
+                                            "count": 22,
                                             "is_parallel": true,
+                                            "self": 23.149594113990588,
                                             "children": {
                                                 "TorchPPOOptimizer.update": {
+                                                    "total": 44.44500243400989,
+                                                    "count": 2241,
                                                     "is_parallel": true,
+                                                    "self": 44.44500243400989
                                                 }
                                             }
                                         }
                     }
                 },
                 "TrainerController._save_models": {
+                    "total": 0.09304354400001102,
                     "count": 1,
+                    "self": 0.0009155469997494947,
                     "children": {
                         "RLTrainer._checkpoint": {
+                            "total": 0.09212799700026153,
                             "count": 1,
+                            "self": 0.09212799700026153
                         }
                     }
                 }

run_logs/training_status.json CHANGED Viewed

@@ -4,8 +4,8 @@
             {
                 "steps": 49936,
                 "file_path": "results/SnowballTarget1/SnowballTarget/SnowballTarget-49936.onnx",
-                "reward": 13.272727272727273,
-                "creation_time": 1719033845.1762831,
                 "auxillary_file_paths": [
                     "results/SnowballTarget1/SnowballTarget/SnowballTarget-49936.pt"
                 ]
@@ -13,8 +13,8 @@
             {
                 "steps": 99960,
                 "file_path": "results/SnowballTarget1/SnowballTarget/SnowballTarget-99960.onnx",
-                "reward": 20.181818181818183,
-                "creation_time": 1719033954.8887966,
                 "auxillary_file_paths": [
                     "results/SnowballTarget1/SnowballTarget/SnowballTarget-99960.pt"
                 ]
@@ -22,8 +22,8 @@
             {
                 "steps": 100408,
                 "file_path": "results/SnowballTarget1/SnowballTarget/SnowballTarget-100408.onnx",
-                "reward": 20.181818181818183,
-                "creation_time": 1719033955.26789,
                 "auxillary_file_paths": [
                     "results/SnowballTarget1/SnowballTarget/SnowballTarget-100408.pt"
                 ]
@@ -32,8 +32,8 @@
         "final_checkpoint": {
             "steps": 100408,
             "file_path": "results/SnowballTarget1/SnowballTarget.onnx",
-            "reward": 20.181818181818183,
-            "creation_time": 1719033955.26789,
             "auxillary_file_paths": [
                 "results/SnowballTarget1/SnowballTarget/SnowballTarget-100408.pt"
             ]

             {
                 "steps": 49936,
                 "file_path": "results/SnowballTarget1/SnowballTarget/SnowballTarget-49936.onnx",
+                "reward": 11.0,
+                "creation_time": 1719035118.8526025,
                 "auxillary_file_paths": [
                     "results/SnowballTarget1/SnowballTarget/SnowballTarget-49936.pt"
                 ]
             {
                 "steps": 99960,
                 "file_path": "results/SnowballTarget1/SnowballTarget/SnowballTarget-99960.onnx",
+                "reward": 13.636363636363637,
+                "creation_time": 1719035231.2753816,
                 "auxillary_file_paths": [
                     "results/SnowballTarget1/SnowballTarget/SnowballTarget-99960.pt"
                 ]
             {
                 "steps": 100408,
                 "file_path": "results/SnowballTarget1/SnowballTarget/SnowballTarget-100408.onnx",
+                "reward": 13.636363636363637,
+                "creation_time": 1719035231.4224975,
                 "auxillary_file_paths": [
                     "results/SnowballTarget1/SnowballTarget/SnowballTarget-100408.pt"
                 ]
         "final_checkpoint": {
             "steps": 100408,
             "file_path": "results/SnowballTarget1/SnowballTarget.onnx",
+            "reward": 13.636363636363637,
+            "creation_time": 1719035231.4224975,
             "auxillary_file_paths": [
                 "results/SnowballTarget1/SnowballTarget/SnowballTarget-100408.pt"
             ]