| { | |
| "lora_name": "lora-meta-llama_Meta-Llama-3-8B-16bit-ORKL-cleaned_juergen-v3_LR_1e-3", | |
| "always_override": true, | |
| "save_steps": 722, | |
| "micro_batch_size": 4, | |
| "batch_size": 0, | |
| "epochs": 10, | |
| "learning_rate": "1e-3", | |
| "lr_scheduler_type": "linear", | |
| "lora_rank": 128, | |
| "lora_alpha": 256, | |
| "lora_dropout": 0.05, | |
| "cutoff_len": 256, | |
| "dataset": "None", | |
| "eval_dataset": "None", | |
| "format": "None", | |
| "eval_steps": 100, | |
| "raw_text_file": "orkl-cleaned-juergen", | |
| "higher_rank_limit": false, | |
| "warmup_steps": 128, | |
| "optimizer": "adamw_torch", | |
| "hard_cut_string": "\\n\\n\\n", | |
| "train_only_after": "", | |
| "stop_at_loss": 1, | |
| "add_eos_token": false, | |
| "min_chars": 3, | |
| "report_to": "wandb", | |
| "precize_slicing_overlap": true, | |
| "add_eos_token_type": "Every Block", | |
| "save_steps_under_loss": 1.8, | |
| "add_bos_token": true, | |
| "training_projection": "q-v", | |
| "sliding_window": false, | |
| "warmup_ratio": 0, | |
| "grad_accumulation": 64, | |
| "neft_noise_alpha": 0 | |
| } |