{ "model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "dataset_name": "George-API/cognitive-data", "output_dir": "./results", "seed": 42, "# Tokenization settings": "These settings ensure we preserve existing tokenization", "trust_remote_code": true, "use_fast_tokenizer": true, "skip_tokenization": true, "max_seq_length": 2048, "chat_template": "chatml", "# Quantization settings": "4-bit quantization for memory efficiency", "load_in_4bit": true, "bnb_4bit_quant_type": "nf4", "bnb_4bit_compute_dtype": "float16", "bnb_4bit_use_double_quant": true, "# PEFT settings": "LoRA configuration for efficient fine-tuning", "use_peft": true, "lora_r": 16, "lora_alpha": 32, "lora_dropout": 0.05, "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], "# Training parameters": "Optimized for cognitive science fine-tuning", "num_train_epochs": 5, "per_device_train_batch_size": 4, "gradient_accumulation_steps": 8, "learning_rate": 3e-5, "weight_decay": 0.01, "warmup_ratio": 0.1, "lr_scheduler_type": "linear", "logging_steps": 10, "save_strategy": "steps", "save_steps": 100, "save_total_limit": 3, "fp16": true, "bf16": false, "max_grad_norm": 0.5, "# Hugging Face Hub settings": "For saving and sharing the model", "push_to_hub": true, "hub_model_id": "DeepSeek-Cognitive-Science", "hub_private_repo": true }