Spaces:
Running
Running
| data: | |
| train_files: hiyouga/math12k@train | |
| val_files: hiyouga/math12k@test | |
| prompt_key: problem | |
| answer_key: answer | |
| image_key: images | |
| max_prompt_length: 2048 | |
| max_response_length: 2048 | |
| rollout_batch_size: 512 | |
| val_batch_size: 1024 | |
| format_prompt: ./examples/format_prompt/math_format.jinja | |
| override_chat_template: null | |
| shuffle: true | |
| seed: 1 | |
| max_pixels: 4194304 | |
| min_pixels: 262144 | |
| filter_overlong_prompts: true | |
| algorithm: | |
| adv_estimator: grpo | |
| disable_kl: false | |
| use_kl_loss: true | |
| kl_penalty: low_var_kl | |
| kl_coef: 1.0e-2 | |
| mock_data: test | |
| worker: | |
| actor: | |
| global_batch_size: 128 | |
| micro_batch_size_per_device_for_update: 2 | |
| micro_batch_size_per_device_for_experience: 8 | |
| max_grad_norm: 1.0 | |
| padding_free: true | |
| ulysses_sequence_parallel_size: 1 | |
| model: | |
| model_path: Qwen/Qwen2.5-7B-Instruct | |
| enable_gradient_checkpointing: true | |
| trust_remote_code: false | |
| freeze_vision_tower: false | |
| optim: | |
| lr: 1.0e-6 | |
| weight_decay: 1.0e-2 | |
| strategy: adamw # {adamw, adamw_bf16} | |
| lr_warmup_ratio: 0.0 | |
| fsdp: | |
| enable_full_shard: true | |
| enable_cpu_offload: false | |
| enable_rank0_init: true | |
| offload: | |
| offload_params: true # true: more CPU memory; false: more GPU memory | |
| offload_optimizer: true # true: more CPU memory; false: more GPU memory | |
| rollout: | |
| n: 5 | |
| temperature: 1.0 | |
| top_p: 0.99 | |
| gpu_memory_utilization: 0.7 | |
| enforce_eager: false | |
| enable_chunked_prefill: false | |
| tensor_parallel_size: 2 | |
| limit_images: 0 | |
| val_override_config: | |
| temperature: 1.0 | |
| n: 1 | |
| ref: | |
| fsdp: | |
| enable_full_shard: true | |
| enable_cpu_offload: true # true: more CPU memory; false: more GPU memory | |
| enable_rank0_init: true | |
| offload: | |
| offload_params: true | |
| reward: | |
| reward_type: batch | |
| reward_function: ./examples/reward_function/math.py:compute_score | |
| trainer: | |
| total_epochs: 2 | |
| max_steps: null | |
| project_name: easy_r1 | |
| experiment_name: qwen2_5_7b_math_grpo | |
| logger: ["console", "wandb"] | |
| nnodes: 1 | |
| n_gpus_per_node: 8 | |
| val_freq: 3 # -1 to disable | |
| val_before_train: true | |
| val_only: false | |
| val_generations_to_log: 3 | |
| save_freq: 5 # -1 to disable | |
| save_limit: 3 # -1 to disable | |
| save_checkpoint_path: your_checkpoint_path | |
| load_checkpoint_path: null |