Update README.md
Browse files
README.md
CHANGED
@@ -28,7 +28,7 @@ hub_model_id: sudoping01/bambara-llm-exp3
|
|
28 |
plugins:
|
29 |
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
|
30 |
cut_cross_entropy: true
|
31 |
-
load_in_4bit: false
|
32 |
gradient_checkpointing: true
|
33 |
gradient_checkpointing_kwargs:
|
34 |
use_reentrant: false
|
@@ -49,26 +49,26 @@ datasets:
|
|
49 |
content: content
|
50 |
val_set_size: 0.01
|
51 |
output_dir: ./outputs/bambara-gemma3n-lora-exp4
|
52 |
-
adapter: lora
|
53 |
-
lora_r: 64
|
54 |
-
lora_alpha: 128
|
55 |
lora_dropout: 0.05
|
56 |
lora_target_modules: 'model.language_model.layers.[\d]+.(mlp|self_attn).(up|down|gate|q|k|v|o)_proj'
|
57 |
sequence_len: 4096
|
58 |
sample_packing: false
|
59 |
pad_to_sequence_len: false
|
60 |
-
micro_batch_size: 8
|
61 |
gradient_accumulation_steps: 2
|
62 |
-
num_epochs: 3
|
63 |
optimizer: adamw_8bit
|
64 |
lr_scheduler: cosine
|
65 |
-
learning_rate: 1.2e-4
|
66 |
warmup_ratio: 0.03
|
67 |
weight_decay: 0.01
|
68 |
bf16: auto
|
69 |
tf32: false
|
70 |
logging_steps: 10
|
71 |
-
saves_per_epoch: 2
|
72 |
evals_per_epoch: 2
|
73 |
```
|
74 |
|
|
|
28 |
plugins:
|
29 |
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
|
30 |
cut_cross_entropy: true
|
31 |
+
load_in_4bit: false
|
32 |
gradient_checkpointing: true
|
33 |
gradient_checkpointing_kwargs:
|
34 |
use_reentrant: false
|
|
|
49 |
content: content
|
50 |
val_set_size: 0.01
|
51 |
output_dir: ./outputs/bambara-gemma3n-lora-exp4
|
52 |
+
adapter: lora
|
53 |
+
lora_r: 64
|
54 |
+
lora_alpha: 128
|
55 |
lora_dropout: 0.05
|
56 |
lora_target_modules: 'model.language_model.layers.[\d]+.(mlp|self_attn).(up|down|gate|q|k|v|o)_proj'
|
57 |
sequence_len: 4096
|
58 |
sample_packing: false
|
59 |
pad_to_sequence_len: false
|
60 |
+
micro_batch_size: 8
|
61 |
gradient_accumulation_steps: 2
|
62 |
+
num_epochs: 3
|
63 |
optimizer: adamw_8bit
|
64 |
lr_scheduler: cosine
|
65 |
+
learning_rate: 1.2e-4
|
66 |
warmup_ratio: 0.03
|
67 |
weight_decay: 0.01
|
68 |
bf16: auto
|
69 |
tf32: false
|
70 |
logging_steps: 10
|
71 |
+
saves_per_epoch: 2
|
72 |
evals_per_epoch: 2
|
73 |
```
|
74 |
|