wim-n1-phi4-mini-merged / training_args.json
yhavinga's picture
Upload Phi-4-mini N1 entity extraction merged model
79c1bee verified
raw
history blame contribute delete
876 Bytes
{
"model": "phi4-mini",
"use_unsloth_model": true,
"dataset": "UWV/wim-instruct-wiki-to-jsonld-agent-steps",
"max_samples": null,
"filter_n1_only": true,
"max_seq_length": 16384,
"batch_size": 16,
"gradient_accumulation_steps": 1,
"max_steps": 2000,
"learning_rate": 2e-05,
"warmup_steps": 50,
"max_grad_norm": 1.0,
"lora_r": 512,
"lora_alpha": 1024,
"lora_dropout": 0.05,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
],
"load_in_4bit": true,
"use_gradient_checkpointing": false,
"rope_scaling": null,
"output_dir": "/data/model_outputs/n1_entity_extraction_model_r512_dropout",
"assistant_only_loss": true,
"no_eval": false,
"num_workers": 8,
"full_finetune": false,
"wandb": true,
"seed": 42,
"push_to_hub": false,
"hub_model_id": null
}