Training in progress, step 80
Browse files
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 174655536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f865b5355a75cf78bf118db9027f3b675c63c91153dd98ebbad4809ce9101de
|
3 |
size 174655536
|
wandb/debug-internal.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20250203_022803-dcc7zgc1/files/output.log
CHANGED
@@ -44,4 +44,24 @@ The model is not an instance of PreTrainedModel. No liger kernels will be applie
|
|
44 |
{'loss': 0.0844, 'grad_norm': 2.0329487323760986, 'learning_rate': 4.684055976615924e-05, 'epoch': 0.5}
|
45 |
{'loss': 0.0606, 'grad_norm': 0.9544902443885803, 'learning_rate': 4.625542839324036e-05, 'epoch': 0.52}
|
46 |
with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
{'loss': 0.0844, 'grad_norm': 2.0329487323760986, 'learning_rate': 4.684055976615924e-05, 'epoch': 0.5}
|
45 |
{'loss': 0.0606, 'grad_norm': 0.9544902443885803, 'learning_rate': 4.625542839324036e-05, 'epoch': 0.52}
|
46 |
with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]
|
47 |
+
53%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/152 [38:58<33:23, 27.82s/it]
|
48 |
+
{'loss': 0.0741, 'grad_norm': 1.5324604511260986, 'learning_rate': 4.5624936864957556e-05, 'epoch': 0.55}
|
49 |
+
{'loss': 0.0766, 'grad_norm': 1.0285587310791016, 'learning_rate': 4.4950430682006e-05, 'epoch': 0.58}
|
50 |
+
{'loss': 0.0799, 'grad_norm': 1.295554280281067, 'learning_rate': 4.423334927457198e-05, 'epoch': 0.6}
|
51 |
+
{'loss': 0.0796, 'grad_norm': 0.9307866096496582, 'learning_rate': 4.347522293051648e-05, 'epoch': 0.63}
|
52 |
+
{'loss': 0.067, 'grad_norm': 0.7554837465286255, 'learning_rate': 4.267766952966369e-05, 'epoch': 0.65}
|
53 |
+
{'loss': 0.054, 'grad_norm': 0.6210789084434509, 'learning_rate': 4.184239109116393e-05, 'epoch': 0.68}
|
54 |
+
{'loss': 0.0728, 'grad_norm': 1.059621810913086, 'learning_rate': 4.097117014129903e-05, 'epoch': 0.71}
|
55 |
+
{'loss': 0.0782, 'grad_norm': 1.0603656768798828, 'learning_rate': 4.0065865909481417e-05, 'epoch': 0.73}
|
56 |
+
{'loss': 0.0843, 'grad_norm': 1.2227962017059326, 'learning_rate': 3.91284103605648e-05, 'epoch': 0.76}
|
57 |
+
{'loss': 0.0688, 'grad_norm': 1.2021204233169556, 'learning_rate': 3.81608040719339e-05, 'epoch': 0.78}
|
58 |
+
{'loss': 0.0667, 'grad_norm': 1.130820631980896, 'learning_rate': 3.716511196417141e-05, 'epoch': 0.81}
|
59 |
+
{'loss': 0.0506, 'grad_norm': 0.8557747602462769, 'learning_rate': 3.6143458894413465e-05, 'epoch': 0.84}
|
60 |
+
{'loss': 0.0378, 'grad_norm': 0.8222028017044067, 'learning_rate': 3.509802512179737e-05, 'epoch': 0.86}
|
61 |
+
{'loss': 0.069, 'grad_norm': 1.441278338432312, 'learning_rate': 3.403104165467883e-05, 'epoch': 0.89}
|
62 |
+
{'loss': 0.048, 'grad_norm': 1.504895806312561, 'learning_rate': 3.294478548954754e-05, 'epoch': 0.92}
|
63 |
+
{'loss': 0.0557, 'grad_norm': 0.9560084342956543, 'learning_rate': 3.1841574751802076e-05, 'epoch': 0.94}
|
64 |
+
{'loss': 0.0505, 'grad_norm': 1.1108149290084839, 'learning_rate': 3.072376374875335e-05, 'epoch': 0.97}
|
65 |
+
{'loss': 0.0584, 'grad_norm': 0.7871246337890625, 'learning_rate': 2.9593737945414264e-05, 'epoch': 0.99}
|
66 |
+
{'loss': 0.0325, 'grad_norm': 1.2843117713928223, 'learning_rate': 2.8453908873797058e-05, 'epoch': 1.01}
|
67 |
+
{'loss': 0.04, 'grad_norm': 0.8063321113586426, 'learning_rate': 2.7306708986582553e-05, 'epoch': 1.04}
|
wandb/run-20250203_022803-dcc7zgc1/logs/debug-internal.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|