| { | |
| "exp_name": "train_ppo", | |
| "seed": 0, | |
| "log_with": "wandb", | |
| "task_name": null, | |
| "model_name": "google/gemma-2b", | |
| "query_dataset": "imdb", | |
| "reward_model": "sentiment-analysis:lvwerra/distilbert-imdb", | |
| "remove_unused_columns": true, | |
| "tracker_kwargs": {}, | |
| "accelerator_kwargs": {}, | |
| "project_kwargs": {}, | |
| "tracker_project_name": "trl", | |
| "push_to_hub_if_best_kwargs": { | |
| "repo_id": "gemma_2b_oasst1_ppo_model" | |
| }, | |
| "steps": 20000, | |
| "learning_rate": 1e-05, | |
| "adap_kl_ctrl": true, | |
| "init_kl_coef": 0.2, | |
| "kl_penalty": "kl", | |
| "target": 6, | |
| "horizon": 10000, | |
| "gamma": 1, | |
| "lam": 0.95, | |
| "cliprange": 0.2, | |
| "cliprange_value": 0.2, | |
| "vf_coef": 0.1, | |
| "batch_size": 25, | |
| "forward_batch_size": null, | |
| "mini_batch_size": 25, | |
| "gradient_accumulation_steps": 1, | |
| "world_size": 1, | |
| "ppo_epochs": 1, | |
| "max_grad_norm": null, | |
| "optimize_cuda_cache": null, | |
| "optimize_device_cache": false, | |
| "early_stopping": true, | |
| "target_kl": 1, | |
| "compare_steps": 150, | |
| "ratio_threshold": 10.0, | |
| "use_score_scaling": false, | |
| "use_score_norm": false, | |
| "score_clip": null, | |
| "whiten_rewards": false, | |
| "is_encoder_decoder": false, | |
| "is_peft_model": true, | |
| "backward_batch_size": 25, | |
| "global_backward_batch_size": 25, | |
| "global_batch_size": 25 | |
| } |