dereckpichemila commited on
Commit
4728a06
·
verified ·
1 Parent(s): b6f2447

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. .hydra/config.yaml +155 -0
  2. .hydra/hydra.yaml +154 -0
  3. .hydra/overrides.yaml +1 -0
  4. run.log +0 -0
  5. seed_0/Qwen/Qwen3-4B-Instruct-2507/adapters/README.md +207 -0
  6. seed_0/Qwen/Qwen3-4B-Instruct-2507/adapters/critic_adapter/adapter_config.json +42 -0
  7. src_code_for_reproducibility/docs/Makefile +19 -0
  8. src_code_for_reproducibility/docs/generate_docs.py +249 -0
  9. src_code_for_reproducibility/docs/make.bat +35 -0
  10. src_code_for_reproducibility/docs/source/contributing.rst +0 -0
  11. src_code_for_reproducibility/docs/source/environments.rst +35 -0
  12. src_code_for_reproducibility/docs/source/index.rst +22 -0
  13. src_code_for_reproducibility/docs/source/installation.rst +10 -0
  14. src_code_for_reproducibility/docs/source/launch.rst +0 -0
  15. src_code_for_reproducibility/docs/source/marl_standard.rst +141 -0
  16. src_code_for_reproducibility/docs/source/modules.rst +7 -0
  17. src_code_for_reproducibility/docs/source/src.environments.dond.dond_agent.rst +7 -0
  18. src_code_for_reproducibility/docs/source/src.environments.dond.dond_return_funcs.rst +7 -0
  19. src_code_for_reproducibility/docs/source/src.environments.dond.dond_statistics_funcs.rst +7 -0
  20. src_code_for_reproducibility/docs/source/src.environments.dond.dond_training_data_funcs.rst +7 -0
  21. src_code_for_reproducibility/docs/source/src.environments.dond.rst +19 -0
  22. src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst +7 -0
  23. src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_agent.rst +7 -0
  24. src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_game.rst +7 -0
  25. src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_statistics_funcs.rst +7 -0
  26. src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_training_data_funcs.rst +7 -0
  27. src_code_for_reproducibility/docs/source/src.experiments.arithmetic_test.rst +7 -0
  28. src_code_for_reproducibility/docs/source/src.experiments.generate_and_train.rst +7 -0
  29. src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst +7 -0
  30. src_code_for_reproducibility/docs/source/src.experiments.rst +17 -0
  31. src_code_for_reproducibility/docs/source/src.generation.rst +15 -0
  32. src_code_for_reproducibility/docs/source/src.models.dummy_hf_agent.rst +7 -0
  33. src_code_for_reproducibility/docs/source/src.models.dummy_local_llm.rst +7 -0
  34. src_code_for_reproducibility/docs/source/src.models.local_llm.rst +7 -0
  35. src_code_for_reproducibility/docs/source/src.models.new_local_llm.rst +7 -0
  36. src_code_for_reproducibility/docs/source/src.models.oai_agent.rst +7 -0
  37. src_code_for_reproducibility/docs/source/src.models.rst +20 -0
  38. src_code_for_reproducibility/docs/source/src.models.server_llm.rst +7 -0
  39. src_code_for_reproducibility/docs/source/src.models.updatable_worker.rst +7 -0
  40. src_code_for_reproducibility/docs/source/src.rst +28 -0
  41. src_code_for_reproducibility/docs/source/src.training.ppo_train.rst +7 -0
  42. src_code_for_reproducibility/docs/source/src.training.ppo_train_value_head.rst +7 -0
  43. src_code_for_reproducibility/docs/source/src.training.rst +19 -0
  44. src_code_for_reproducibility/docs/source/src.training.train_main.rst +7 -0
  45. src_code_for_reproducibility/docs/source/src.utils.common_imports.rst +7 -0
  46. src_code_for_reproducibility/docs/source/src.utils.export_ppo_training_set.rst +7 -0
  47. src_code_for_reproducibility/docs/source/src.utils.extra_stats.rst +7 -0
  48. src_code_for_reproducibility/docs/source/src.utils.model_to_cpu.rst +7 -0
  49. src_code_for_reproducibility/docs/source/src.utils.rst +24 -0
  50. src_code_for_reproducibility/docs/source/src.utils.update_start_epoch.rst +7 -0
.hydra/config.yaml ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment:
2
+ nb_epochs: 1
3
+ nb_matches_per_iteration: 64
4
+ reinit_matches_each_it: true
5
+ checkpoint_every_n_iterations: 50
6
+ start_epoch: 0
7
+ resume_experiment: true
8
+ base_seed: 0
9
+ seed_group_size: 8
10
+ train: false
11
+ name: openai_gpt_4.1_tas
12
+ method: generate_and_train
13
+ description: Trust-and-Split negotiation game
14
+ temperature: 1.0
15
+ markov_games:
16
+ runner_method_name: LinearRunner
17
+ runner_kwargs: {}
18
+ group_by_round: true
19
+ simulation_class_name: TrustAndSplitSimulation
20
+ simulation_init_args:
21
+ agent_ids: ${agent_ids}
22
+ nb_of_rounds: 10
23
+ quota_messages_per_agent_per_round: 1
24
+ agents:
25
+ 0:
26
+ agent_id: ${agent_0_id}
27
+ agent_class_name: TrustAndSplitAgent
28
+ policy_id: openai_llm
29
+ init_kwargs:
30
+ goal: Maximize your total points over the whole game.
31
+ num_message_chars: 500
32
+ 1:
33
+ agent_id: ${agent_1_id}
34
+ agent_class_name: TrustAndSplitAgent
35
+ policy_id: openai_llm
36
+ init_kwargs:
37
+ goal: Maximize your total points over the whole game.
38
+ num_message_chars: 500
39
+ models:
40
+ base_llm:
41
+ class: LeanLocalLLM
42
+ init_args:
43
+ llm_id: base_llm
44
+ model_name: Qwen/Qwen3-4B-Instruct-2507
45
+ inference_backend: dummy
46
+ max_thinking_characters: 0
47
+ hf_kwargs:
48
+ device_map: auto
49
+ torch_dtype: bfloat16
50
+ max_memory:
51
+ 0: 20GiB
52
+ attn_implementation: flash_attention_2
53
+ inference_backend_init_kwargs:
54
+ seed: ${experiment.base_seed}
55
+ enable_prefix_caching: true
56
+ max_model_len: 10000.0
57
+ gpu_memory_utilization: 0.5
58
+ dtype: bfloat16
59
+ trust_remote_code: true
60
+ max_lora_rank: 32
61
+ inference_backend_sampling_params:
62
+ temperature: ${temperature}
63
+ top_p: 1.0
64
+ max_tokens: 400
65
+ top_k: -1
66
+ adapter_configs:
67
+ agent_adapter:
68
+ task_type: CAUSAL_LM
69
+ r: 32
70
+ lora_alpha: 64
71
+ lora_dropout: 0.0
72
+ target_modules: all-linear
73
+ critic_adapter:
74
+ task_type: CAUSAL_LM
75
+ r: 32
76
+ lora_alpha: 64
77
+ lora_dropout: 0.0
78
+ target_modules: all-linear
79
+ regex_max_attempts: 3
80
+ openai_llm:
81
+ class: LargeLanguageModelOpenAI
82
+ init_args:
83
+ use_reasoning: false
84
+ llm_id: openai_llm
85
+ model: gpt-4.1
86
+ base_url: https://api.openai.com/v1
87
+ timeout_s: 6000.0
88
+ regex_max_attempts: 3
89
+ sampling_params:
90
+ temperature: 1.0
91
+ top_p: 1.0
92
+ max_output_tokens: 200
93
+ critics:
94
+ agent_critic:
95
+ module_pointer:
96
+ - base_llm
97
+ - critic_adapter
98
+ optimizers:
99
+ agent_optimizer:
100
+ module_pointer:
101
+ - base_llm
102
+ - agent_adapter
103
+ optimizer_class_name: torch.optim.Adam
104
+ init_args:
105
+ lr: 1.0e-06
106
+ weight_decay: 0.0
107
+ critic_optimizer:
108
+ module_pointer: agent_critic
109
+ optimizer_class_name: torch.optim.Adam
110
+ init_args:
111
+ lr: 3.0e-06
112
+ weight_decay: 0.0
113
+ trainers:
114
+ agent_trainer:
115
+ class: TrainerNaive
116
+ module_pointers:
117
+ policy:
118
+ - base_llm
119
+ - agent_adapter
120
+ policy_optimizer: agent_optimizer
121
+ critic: agent_critic
122
+ critic_optimizer: critic_optimizer
123
+ kwargs:
124
+ entropy_coeff: 0.0
125
+ kl_coeff: 0.0
126
+ gradient_clipping: 1.0
127
+ restrict_tokens: null
128
+ mini_batch_size: 1
129
+ use_gradient_checkpointing: false
130
+ use_qwen_reasoning_mask: false
131
+ temperature: ${temperature}
132
+ device: cuda:0
133
+ use_gae: false
134
+ whiten_advantages: false
135
+ whiten_advantages_time_step_wise: false
136
+ skip_discounted_state_visitation: true
137
+ use_gae_lambda_annealing: false
138
+ gae_lambda_annealing_method: None
139
+ gae_lambda_annealing_method_params: None
140
+ gae_lambda_annealing_limit: 0.95
141
+ discount_factor: 0.9
142
+ use_rloo: true
143
+ enable_tokenwise_logging: false
144
+ pg_loss_normalization: batch
145
+ reward_normalizing_constant: 200.0
146
+ train_on_which_data:
147
+ agent_trainer: []
148
+ common_agent_kwargs:
149
+ goal: Maximize your total points over the whole game.
150
+ num_message_chars: 500
151
+ agent_0_id: Alice
152
+ agent_1_id: Bob
153
+ agent_ids:
154
+ - Alice
155
+ - Bob
.hydra/hydra.yaml ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${oc.env:SCRATCH}/llm_negotiation/${now:%Y_%m}/${experiment.name}
4
+ sweep:
5
+ dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task: []
115
+ job:
116
+ name: run
117
+ chdir: false
118
+ override_dirname: ''
119
+ id: ???
120
+ num: ???
121
+ config_name: openai_gpt_4.1_tas
122
+ env_set: {}
123
+ env_copy: []
124
+ config:
125
+ override_dirname:
126
+ kv_sep: '='
127
+ item_sep: ','
128
+ exclude_keys: []
129
+ runtime:
130
+ version: 1.3.2
131
+ version_base: '1.1'
132
+ cwd: /home/mila/d/dereck.piche/experiment_safe/exp_safe_llm_nego
133
+ config_sources:
134
+ - path: hydra.conf
135
+ schema: pkg
136
+ provider: hydra
137
+ - path: /home/mila/d/dereck.piche/experiment_safe/exp_safe_llm_nego/configs
138
+ schema: file
139
+ provider: main
140
+ - path: ''
141
+ schema: structured
142
+ provider: schema
143
+ output_dir: /network/scratch/d/dereck.piche/llm_negotiation/2025_09/openai_gpt_4.1_tas
144
+ choices:
145
+ hydra/env: default
146
+ hydra/callbacks: null
147
+ hydra/job_logging: default
148
+ hydra/hydra_logging: default
149
+ hydra/hydra_help: default
150
+ hydra/help: default
151
+ hydra/sweeper: basic
152
+ hydra/launcher: basic
153
+ hydra/output: default
154
+ verbose: false
.hydra/overrides.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ []
run.log ADDED
The diff for this file is too large to render. See raw diff
 
seed_0/Qwen/Qwen3-4B-Instruct-2507/adapters/README.md ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen3-4B-Instruct-2507
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:Qwen/Qwen3-4B-Instruct-2507
7
+ - lora
8
+ - transformers
9
+ ---
10
+
11
+ # Model Card for Model ID
12
+
13
+ <!-- Provide a quick summary of what the model is/does. -->
14
+
15
+
16
+
17
+ ## Model Details
18
+
19
+ ### Model Description
20
+
21
+ <!-- Provide a longer summary of what this model is. -->
22
+
23
+
24
+
25
+ - **Developed by:** [More Information Needed]
26
+ - **Funded by [optional]:** [More Information Needed]
27
+ - **Shared by [optional]:** [More Information Needed]
28
+ - **Model type:** [More Information Needed]
29
+ - **Language(s) (NLP):** [More Information Needed]
30
+ - **License:** [More Information Needed]
31
+ - **Finetuned from model [optional]:** [More Information Needed]
32
+
33
+ ### Model Sources [optional]
34
+
35
+ <!-- Provide the basic links for the model. -->
36
+
37
+ - **Repository:** [More Information Needed]
38
+ - **Paper [optional]:** [More Information Needed]
39
+ - **Demo [optional]:** [More Information Needed]
40
+
41
+ ## Uses
42
+
43
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
44
+
45
+ ### Direct Use
46
+
47
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
48
+
49
+ [More Information Needed]
50
+
51
+ ### Downstream Use [optional]
52
+
53
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
54
+
55
+ [More Information Needed]
56
+
57
+ ### Out-of-Scope Use
58
+
59
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
60
+
61
+ [More Information Needed]
62
+
63
+ ## Bias, Risks, and Limitations
64
+
65
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
66
+
67
+ [More Information Needed]
68
+
69
+ ### Recommendations
70
+
71
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
72
+
73
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
74
+
75
+ ## How to Get Started with the Model
76
+
77
+ Use the code below to get started with the model.
78
+
79
+ [More Information Needed]
80
+
81
+ ## Training Details
82
+
83
+ ### Training Data
84
+
85
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
86
+
87
+ [More Information Needed]
88
+
89
+ ### Training Procedure
90
+
91
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
92
+
93
+ #### Preprocessing [optional]
94
+
95
+ [More Information Needed]
96
+
97
+
98
+ #### Training Hyperparameters
99
+
100
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
101
+
102
+ #### Speeds, Sizes, Times [optional]
103
+
104
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
105
+
106
+ [More Information Needed]
107
+
108
+ ## Evaluation
109
+
110
+ <!-- This section describes the evaluation protocols and provides the results. -->
111
+
112
+ ### Testing Data, Factors & Metrics
113
+
114
+ #### Testing Data
115
+
116
+ <!-- This should link to a Dataset Card if possible. -->
117
+
118
+ [More Information Needed]
119
+
120
+ #### Factors
121
+
122
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
123
+
124
+ [More Information Needed]
125
+
126
+ #### Metrics
127
+
128
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
129
+
130
+ [More Information Needed]
131
+
132
+ ### Results
133
+
134
+ [More Information Needed]
135
+
136
+ #### Summary
137
+
138
+
139
+
140
+ ## Model Examination [optional]
141
+
142
+ <!-- Relevant interpretability work for the model goes here -->
143
+
144
+ [More Information Needed]
145
+
146
+ ## Environmental Impact
147
+
148
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
149
+
150
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
151
+
152
+ - **Hardware Type:** [More Information Needed]
153
+ - **Hours used:** [More Information Needed]
154
+ - **Cloud Provider:** [More Information Needed]
155
+ - **Compute Region:** [More Information Needed]
156
+ - **Carbon Emitted:** [More Information Needed]
157
+
158
+ ## Technical Specifications [optional]
159
+
160
+ ### Model Architecture and Objective
161
+
162
+ [More Information Needed]
163
+
164
+ ### Compute Infrastructure
165
+
166
+ [More Information Needed]
167
+
168
+ #### Hardware
169
+
170
+ [More Information Needed]
171
+
172
+ #### Software
173
+
174
+ [More Information Needed]
175
+
176
+ ## Citation [optional]
177
+
178
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
179
+
180
+ **BibTeX:**
181
+
182
+ [More Information Needed]
183
+
184
+ **APA:**
185
+
186
+ [More Information Needed]
187
+
188
+ ## Glossary [optional]
189
+
190
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
191
+
192
+ [More Information Needed]
193
+
194
+ ## More Information [optional]
195
+
196
+ [More Information Needed]
197
+
198
+ ## Model Card Authors [optional]
199
+
200
+ [More Information Needed]
201
+
202
+ ## Model Card Contact
203
+
204
+ [More Information Needed]
205
+ ### Framework versions
206
+
207
+ - PEFT 0.17.0
seed_0/Qwen/Qwen3-4B-Instruct-2507/adapters/critic_adapter/adapter_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Qwen/Qwen3-4B-Instruct-2507",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 64,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "k_proj",
29
+ "v_proj",
30
+ "gate_proj",
31
+ "up_proj",
32
+ "down_proj",
33
+ "q_proj",
34
+ "o_proj"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_dora": false,
40
+ "use_qalora": false,
41
+ "use_rslora": false
42
+ }
src_code_for_reproducibility/docs/Makefile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Minimal makefile for Sphinx documentation
2
+
3
+ # You can set these variables from the command line, and also
4
+ # from the environment for the first two.
5
+ SPHINXOPTS ?=
6
+ SPHINXBUILD ?= sphinx-build
7
+ SOURCEDIR = source
8
+ BUILDDIR = build
9
+
10
+ # Put it first so that "make" without argument is like "make help".
11
+ help:
12
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(SPHINXFLAGS)
13
+
14
+ .PHONY: help Makefile
15
+
16
+ # Catch-all target: route all unknown targets to Sphinx using the new
17
+ # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
18
+ %: Makefile
19
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(SPHINXFLAGS)
src_code_for_reproducibility/docs/generate_docs.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to automatically generate Sphinx documentation for all modules and build the HTML website.
4
+ """
5
+ import importlib.util
6
+ import os
7
+ import subprocess
8
+ import sys
9
+
10
+
11
+ def check_and_install_dependencies():
12
+ """Check for required dependencies and install them if missing."""
13
+ required_packages = [
14
+ "sphinx",
15
+ "sphinx-rtd-theme",
16
+ "sphinxcontrib-napoleon",
17
+ "sphinxcontrib-mermaid",
18
+ "sphinx-autodoc-typehints",
19
+ ]
20
+
21
+ missing_packages = []
22
+
23
+ for package in required_packages:
24
+ # Convert package name to module name (replace - with _)
25
+ module_name = package.replace("-", "_")
26
+
27
+ # Check if the package is installed
28
+ if importlib.util.find_spec(module_name) is None:
29
+ missing_packages.append(package)
30
+
31
+ # Install missing packages
32
+ if missing_packages:
33
+ print(f"Installing missing dependencies: {', '.join(missing_packages)}")
34
+ subprocess.check_call(
35
+ [sys.executable, "-m", "pip", "install"] + missing_packages
36
+ )
37
+ print("Dependencies installed successfully")
38
+ else:
39
+ print("All required dependencies are already installed")
40
+
41
+
42
+ def create_makefile(docs_dir):
43
+ """Create a Makefile for Sphinx documentation if it doesn't exist."""
44
+ makefile_path = os.path.join(docs_dir, "Makefile")
45
+
46
+ if os.path.exists(makefile_path):
47
+ print(f"Makefile already exists at {makefile_path}")
48
+ return
49
+
50
+ print(f"Creating Makefile at {makefile_path}")
51
+
52
+ makefile_content = """# Minimal makefile for Sphinx documentation
53
+
54
+ # You can set these variables from the command line, and also
55
+ # from the environment for the first two.
56
+ SPHINXOPTS ?=
57
+ SPHINXBUILD ?= sphinx-build
58
+ SOURCEDIR = source
59
+ BUILDDIR = build
60
+
61
+ # Put it first so that "make" without argument is like "make help".
62
+ help:
63
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(SPHINXFLAGS)
64
+
65
+ .PHONY: help Makefile
66
+
67
+ # Catch-all target: route all unknown targets to Sphinx using the new
68
+ # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
69
+ %: Makefile
70
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(SPHINXFLAGS)
71
+ """
72
+
73
+ with open(makefile_path, "w") as f:
74
+ f.write(makefile_content)
75
+
76
+ print("Makefile created successfully")
77
+
78
+
79
+ def create_make_bat(docs_dir):
80
+ """Create a make.bat file for Windows if it doesn't exist."""
81
+ make_bat_path = os.path.join(docs_dir, "make.bat")
82
+
83
+ if os.path.exists(make_bat_path):
84
+ print(f"make.bat already exists at {make_bat_path}")
85
+ return
86
+
87
+ print(f"Creating make.bat at {make_bat_path}")
88
+
89
+ make_bat_content = """@ECHO OFF
90
+
91
+ pushd %~dp0
92
+
93
+ REM Command file for Sphinx documentation
94
+
95
+ if "%SPHINXBUILD%" == "" (
96
+ set SPHINXBUILD=sphinx-build
97
+ )
98
+ set SOURCEDIR=source
99
+ set BUILDDIR=build
100
+
101
+ %SPHINXBUILD% >NUL 2>NUL
102
+ if errorlevel 9009 (
103
+ echo.
104
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
105
+ echo.installed, then set the SPHINXBUILD environment variable to point
106
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
107
+ echo.may add the Sphinx directory to PATH.
108
+ echo.
109
+ echo.If you don't have Sphinx installed, grab it from
110
+ echo.https://www.sphinx-doc.org/
111
+ exit /b 1
112
+ )
113
+
114
+ if "%1" == "" goto help
115
+
116
+ %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
117
+ goto end
118
+
119
+ :help
120
+ %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
121
+
122
+ :end
123
+ popd
124
+ """
125
+
126
+ with open(make_bat_path, "w") as f:
127
+ f.write(make_bat_content)
128
+
129
+ print("make.bat created successfully")
130
+
131
+
132
+ def main():
133
+ # Check and install required dependencies
134
+ print("=== Checking dependencies ===")
135
+ check_and_install_dependencies()
136
+
137
+ # Get the directory of this script
138
+ script_dir = os.path.dirname(os.path.abspath(__file__))
139
+
140
+ # Path to the project root
141
+ project_root = os.path.dirname(script_dir)
142
+
143
+ # Path to the source directory
144
+ source_dir = os.path.join(project_root, "src")
145
+
146
+ # Path to the docs source directory
147
+ docs_source_dir = os.path.join(script_dir, "source")
148
+
149
+ # Print paths for debugging
150
+ print(f"Script directory: {script_dir}")
151
+ print(f"Project root: {project_root}")
152
+ print(f"Source directory: {source_dir}")
153
+ print(f"Docs source directory: {docs_source_dir}")
154
+
155
+ # Make sure the source directory exists
156
+ if not os.path.exists(source_dir):
157
+ print(f"Error: Source directory {source_dir} does not exist!")
158
+ sys.exit(1)
159
+
160
+ # Make sure the docs source directory exists
161
+ if not os.path.exists(docs_source_dir):
162
+ print(f"Creating docs source directory: {docs_source_dir}")
163
+ os.makedirs(docs_source_dir)
164
+
165
+ # Step 1: Run sphinx-apidoc to generate .rst files for all modules
166
+ print("\n=== Generating API documentation ===")
167
+ cmd = [
168
+ "sphinx-apidoc",
169
+ "-f", # Force overwriting of existing files
170
+ "-e", # Put module documentation before submodule documentation
171
+ "-M", # Put module documentation before subpackage documentation
172
+ "-o",
173
+ docs_source_dir, # Output directory
174
+ source_dir, # Source code directory
175
+ ]
176
+
177
+ print(f"Running command: {' '.join(cmd)}")
178
+ result = subprocess.run(cmd, capture_output=True, text=True)
179
+
180
+ # Print the output of the command
181
+ print("STDOUT:")
182
+ print(result.stdout)
183
+
184
+ print("STDERR:")
185
+ print(result.stderr)
186
+
187
+ if result.returncode != 0:
188
+ print(f"Error: sphinx-apidoc failed with return code {result.returncode}")
189
+ sys.exit(1)
190
+
191
+ # List the files in the docs source directory
192
+ print("\nFiles in docs/source directory:")
193
+ for file in sorted(os.listdir(docs_source_dir)):
194
+ print(f" {file}")
195
+
196
+ print("\nDocumentation source files generated successfully!")
197
+
198
+ # Step 2: Create Makefile and make.bat if they don't exist
199
+ create_makefile(script_dir)
200
+ create_make_bat(script_dir)
201
+
202
+ # Step 3: Build the HTML documentation
203
+ print("\n=== Building HTML documentation ===")
204
+
205
+ # Determine the build command based on the platform
206
+ if os.name == "nt": # Windows
207
+ build_cmd = ["make.bat", "html"]
208
+ else: # Unix/Linux/Mac
209
+ build_cmd = ["make", "html"]
210
+
211
+ # Change to the docs directory to run the build command
212
+ os.chdir(script_dir)
213
+
214
+ print(f"Running command: {' '.join(build_cmd)}")
215
+ build_result = subprocess.run(build_cmd, capture_output=True, text=True)
216
+
217
+ # Print the output of the build command
218
+ print("STDOUT:")
219
+ print(build_result.stdout)
220
+
221
+ print("STDERR:")
222
+ print(build_result.stderr)
223
+
224
+ if build_result.returncode != 0:
225
+ print(f"Error: HTML build failed with return code {build_result.returncode}")
226
+ sys.exit(1)
227
+
228
+ # Get the path to the built HTML documentation
229
+ html_dir = os.path.join(script_dir, "build", "html")
230
+ index_path = os.path.join(html_dir, "index.html")
231
+
232
+ if os.path.exists(index_path):
233
+ print(f"\nHTML documentation built successfully!")
234
+ print(f"You can view it by opening: {index_path}")
235
+
236
+ # Try to open the documentation in a browser
237
+ try:
238
+ import webbrowser
239
+
240
+ print("\nAttempting to open documentation in your default browser...")
241
+ webbrowser.open(f"file://{index_path}")
242
+ except Exception as e:
243
+ print(f"Could not open browser automatically: {e}")
244
+ else:
245
+ print(f"\nWarning: HTML index file not found at {index_path}")
246
+
247
+
248
+ if __name__ == "__main__":
249
+ main()
src_code_for_reproducibility/docs/make.bat ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @ECHO OFF
2
+
3
+ pushd %~dp0
4
+
5
+ REM Command file for Sphinx documentation
6
+
7
+ if "%SPHINXBUILD%" == "" (
8
+ set SPHINXBUILD=sphinx-build
9
+ )
10
+ set SOURCEDIR=source
11
+ set BUILDDIR=build
12
+
13
+ %SPHINXBUILD% >NUL 2>NUL
14
+ if errorlevel 9009 (
15
+ echo.
16
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17
+ echo.installed, then set the SPHINXBUILD environment variable to point
18
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
19
+ echo.may add the Sphinx directory to PATH.
20
+ echo.
21
+ echo.If you don't have Sphinx installed, grab it from
22
+ echo.https://www.sphinx-doc.org/
23
+ exit /b 1
24
+ )
25
+
26
+ if "%1" == "" goto help
27
+
28
+ %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29
+ goto end
30
+
31
+ :help
32
+ %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33
+
34
+ :end
35
+ popd
src_code_for_reproducibility/docs/source/contributing.rst ADDED
File without changes
src_code_for_reproducibility/docs/source/environments.rst ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ =================
2
+ MARL Environments
3
+ =================
4
+
5
+ This section provides detailed documentation for the multi-agent negotiation environments included in the library.
6
+
7
+ Each environment follows the standard interface described in :doc:`../environments` but has its own unique game rules,
8
+ dynamics, and implementation details.
9
+
10
+ .. toctree::
11
+ :maxdepth: 2
12
+ :caption: Available Environments:
13
+
14
+ environments/ipd
15
+ environments/diplomacy
16
+ environments/dond
17
+
18
+ Overview
19
+ --------
20
+
21
+ The library currently includes the following environments:
22
+
23
+ 1. **Iterated Prisoner's Dilemma (IPD)**: A classic game theory problem where two agents repeatedly decide whether to cooperate or defect, with different payoffs based on their joint actions.
24
+
25
+ 2. **Diplomacy**: An adaptation of the board game Diplomacy, where seven European powers compete for control of supply centers through strategic moves and alliances.
26
+
27
+ 3. **Deal or No Deal (DOND)**: A negotiation environment based on `the paper Deal or No Deal? End-to-End Learning for Negotiation Dialogues <https://arxiv.org/pdf/1706.05125>`_ in which agents negotiate over the distribution of a set of prizes.
28
+
29
+ Each environment documentation includes:
30
+
31
+ - Game rules and background
32
+ - Implementation details
33
+ - API reference
34
+ - Example usage
35
+ - Advanced features and customization options
src_code_for_reproducibility/docs/source/index.rst ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Welcome to LLM Negotiation's documentation!
2
+ ===========================================
3
+ This library is a collection of tools for training and evaluating LLM-based agents in multi-agent environments. It is designed to be easy to use and extend.
4
+
5
+ .. toctree::
6
+ :maxdepth: 3
7
+ :caption: Contents:
8
+
9
+ installation
10
+ marl_standard
11
+ environments
12
+ launch
13
+ usage
14
+ modules
15
+ contributing
16
+
17
+ Indices and tables
18
+ ==================
19
+
20
+ * :ref:`genindex`
21
+ * :ref:`modindex`
22
+ * :ref:`search`
src_code_for_reproducibility/docs/source/installation.rst ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Installation
2
+ ===========
3
+
4
+ To install the package, run:
5
+
6
+ .. code-block:: bash
7
+
8
+ git clone https://github.com/yourusername/llm_negotiation.git
9
+ cd llm_negotiation
10
+ pip install -e .
src_code_for_reproducibility/docs/source/launch.rst ADDED
File without changes
src_code_for_reproducibility/docs/source/marl_standard.rst ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ =================
2
+ Abstract Standard for Multi-Agent Negotiation Environments
3
+ =================
4
+
5
+ Multi-Agent Negotiation Environments require more features than gymnasium environments in order to be used as interfaces in general game running code.
6
+ The two fundamental differences between gymnasium environments and Multi-Agent Negotiation Environments are:
7
+
8
+ 1. Response from the LLM is a text action, not a discrete action. Therefore, appropriate parsing of the text is required. The model may need to be run multiple times to get the full action.
9
+ This is why we introduce the `AgentHandler` class, which is responsible for parsing the LLM's response.
10
+ 2. The environment needs to be able to handle multi-agent interactions.
11
+ This is why we introduce the `NegotiationEnvironment` class, which is responsible for handling the multi-agent interactions.
12
+ 3. MARL environments are complex to describe. In different contexts, the same environment may be described differently. Therefore, both the environement and the agent handlers are
13
+ responsible for describing a particular trajectory. This information is given by the `get_log_info` method.
14
+ 4. There might be a lot of overlap between the neural networks used by each agent. For instance, the same model may be used for all agents. This motivates a requirement for a
15
+ policy identifier for each agent.
16
+
17
+ Taking inspiration from the `gymnasium <https://gymnasium.farama.org/>`_ library, we introduce a new standard for Multi-Agent Negotiation Environments.
18
+
19
+ Our standard is based on the following features:
20
+
21
+ Environments are of the form:
22
+
23
+ .. code-block:: python
24
+
25
+ class MarlEnvironment():
26
+
27
+ def __init__(self):
28
+ """Initialize the environment."""
29
+ pass
30
+
31
+ def reset(self):
32
+ """Reset the environment to an initial state and return the initial observation.
33
+ Returns:
34
+ observation (dict): A dictionary where keys are agent identifiers and values are observations.
35
+ """
36
+ # (...)
37
+ return observation
38
+
39
+ def step(self, actions):
40
+ """Take a step in the environment using the provided actions.
41
+
42
+ Args:
43
+ actions (dict): A dictionary where keys are agent identifiers and values are actions.
44
+
45
+ Returns:
46
+ observations (dict): A dictionary where keys are agent identifiers and values are observations.
47
+ reward (dict): A dictionary where keys are agent identifiers and values are rewards.
48
+ done (bool): Whether the episode has ended.
49
+ info (dict): Additional information about the environment.
50
+ """
51
+ # (...)
52
+ return observations, done, info
53
+
54
+ def get_log_info(self):
55
+ """Get additional information about the environment. This information is used to log the game.
56
+ Returns:
57
+ log_info (dict): Information about the environment required to log the game.
58
+ """
59
+ # (...)
60
+ return log_info
61
+
62
+ def render(self):
63
+ """Render the current state of the environment."""
64
+ pass
65
+
66
+ def close(self):
67
+ """Perform any necessary cleanup."""
68
+ pass
69
+
70
+
71
+ class AgentState():
72
+
73
+ def __init__(self):
74
+ """Initialize the agent state."""
75
+ pass
76
+
77
+ def step(self, observation_from_env, policy_output=None):
78
+ """Update the agent state based on the observation and action.
79
+ The action is the output of the LLM.
80
+ """
81
+
82
+ Args:
83
+ observation_from_env (dict): The observation of the environment.
84
+ policy_output : The output of the policy.
85
+
86
+ Returns:
87
+ policy_id (str): The policy identifier.
88
+ policy_input (dict): The input to the policy.
89
+ action : The official action to be sent to the environment.
90
+ done (bool): Whether the LLM action is ready to be sent to the environment.
91
+ info (dict): Additional information about the agent.
92
+ """
93
+ # (...)
94
+ return policy_id, policy_input, action, done, info
95
+
96
+ def get_log_info(self):
97
+ """Get information about the agent required to log a trajectory.
98
+ Returns:
99
+ log_info (dict): Information about the agent required to log a trajectory.
100
+ """
101
+ # (...)
102
+ return log_info
103
+
104
+ def render(self):
105
+ """Render the current state of the environment."""
106
+ pass
107
+
108
+ def close(self):
109
+ """Perform any necessary cleanup."""
110
+ pass
111
+
112
+
113
+ Implicitely, the keys of the `observations` in the `step` method of the `MarlEnvironment` interface represent the set of agents from which an action is expected at the current step. The next step should only expect actions from the agents in the `observations` dictionary.
114
+
115
+ As you can see, both classes have a `get_log_info` method. This method is used to log the game. It returns a dictionary with keys being the agent identifiers and values being the information to log. The reason we need this is because the environment and the agent handler may need to log different information. It makes it easier to log from the perspective of each agent. The core environment class should not need to know about the details of the agent handler.
116
+
117
+
118
+
119
+ Running Environments in Parallel
120
+ --------------------------------
121
+ This standard allows the use of the `run_batched_matches` function (TODO: link) to run environments in an efficient way. The core idea is to batch the policy calls for all agents in the environment.
122
+
123
+ .. note::
124
+ The ``run_batched_matches`` function allows you to run multiple negotiation games, or "matches," in parallel.
125
+ After each environment is initialized, the function continuously loops over all active matches and checks which agents
126
+ are still pending actions. Each agent's logic can require multiple calls to the policy (e.g., an LLM) before an action
127
+ becomes "ready" to be sent to the environment. (For instance, an agent might need multiple policy calls before having a string which can be parsed into a valid action.) While an agent is waiting for a policy output, these calls for all agents across all matches are grouped together by unique policy identifier and processed in batch for efficiency. This is the core functionality of the ``run_batched_matches`` function.
128
+
129
+ Only once all actions from the required agents at a given step for an environment are ready does the function make a single ``env.step(...)`` call; this ensures
130
+ every match moves forward in lockstep for all its active agents. As soon as an environment signals it is done, the function
131
+ retrieves logged information from both the environment and the agent states before removing this match from the active set.
132
+
133
+ If there are more matches waiting to be processed, they are then started one by one to maintain the specified degree of parallelism.
134
+ This batching approach provides an efficient mechanism to handle multi-agent or multi-policy environments, ensuring minimal
135
+ overhead and a clear, unified flow for stepping through matches.
136
+
137
+ Here is a diagram that shows how the `run_batched_matches` function works at a high level:
138
+
139
+ .. image:: media/runbatch.png
140
+ :alt: Alternate text for the image
141
+ :width: 1000px
src_code_for_reproducibility/docs/source/modules.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src
2
+ ===
3
+
4
+ .. toctree::
5
+ :maxdepth: 4
6
+
7
+ src
src_code_for_reproducibility/docs/source/src.environments.dond.dond_agent.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_agent module
2
+ ========================================
3
+
4
+ .. automodule:: src.environments.dond.dond_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.dond_return_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_return\_funcs module
2
+ ================================================
3
+
4
+ .. automodule:: src.environments.dond.dond_return_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.dond_statistics_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_statistics\_funcs module
2
+ ====================================================
3
+
4
+ .. automodule:: src.environments.dond.dond_statistics_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.dond_training_data_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_training\_data\_funcs module
2
+ ========================================================
3
+
4
+ .. automodule:: src.environments.dond.dond_training_data_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.rst ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.environments.dond package
2
+ =============================
3
+
4
+ .. automodule:: src.environments.dond
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.environments.dond.dond_agent
16
+ src.environments.dond.dond_game
17
+ src.environments.dond.dond_log_funcs
18
+ src.environments.dond.dond_statistics_funcs
19
+ src.environments.dond.dond_training_data_funcs
src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.environment\_imports module
2
+ ============================================
3
+
4
+ .. automodule:: src.environments.environment_imports
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_agent.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.ipd.ipd\_agent module
2
+ ======================================
3
+
4
+ .. automodule:: src.environments.ipd.ipd_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_game.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.ipd.ipd\_game module
2
+ =====================================
3
+
4
+ .. automodule:: src.environments.ipd.ipd_game
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_statistics_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.ipd.ipd\_statistics\_funcs module
2
+ ==================================================
3
+
4
+ .. automodule:: src.environments.ipd.ipd_statistics_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_training_data_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.ipd.ipd\_training\_data\_funcs module
2
+ ======================================================
3
+
4
+ .. automodule:: src.environments.ipd.ipd_training_data_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.experiments.arithmetic_test.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.arithmetic\_test module
2
+ =======================================
3
+
4
+ .. automodule:: src.experiments.arithmetic_test
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.experiments.generate_and_train.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.generate\_and\_train module
2
+ ===========================================
3
+
4
+ .. automodule:: src.experiments.generate_and_train
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.last\_completion module
2
+ =======================================
3
+
4
+ .. automodule:: src.experiments.last_completion
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.experiments.rst ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.experiments package
2
+ =======================
3
+
4
+ .. automodule:: src.experiments
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.experiments.arithmetic_test
16
+ src.experiments.generate_and_train
17
+ src.experiments.last_completion
src_code_for_reproducibility/docs/source/src.generation.rst ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.generation package
2
+ ======================
3
+
4
+ .. automodule:: src.generation
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.generation.run_games
src_code_for_reproducibility/docs/source/src.models.dummy_hf_agent.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.dummy\_hf\_agent module
2
+ ==================================
3
+
4
+ .. automodule:: src.models.dummy_llm_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.dummy_local_llm.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.dummy\_local\_llm module
2
+ ===================================
3
+
4
+ .. automodule:: src.models.dummy_local_llm
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.local_llm.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.local\_llm module
2
+ ============================
3
+
4
+ .. automodule:: src.models.local_llm
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.new_local_llm.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.new\_local\_llm module
2
+ =================================
3
+
4
+ .. automodule:: src.models.new_local_llm
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.oai_agent.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.oai\_agent module
2
+ ============================
3
+
4
+ .. automodule:: src.models.oai_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.rst ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.models package
2
+ ==================
3
+
4
+ .. automodule:: src.models
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.models.dummy_local_llm
16
+ src.models.local_llm
17
+ src.models.new_local_llm
18
+ src.models.server_llm
19
+ src.models.updatable_worker
20
+ src.models.vllm_worker_wrap
src_code_for_reproducibility/docs/source/src.models.server_llm.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.server\_llm module
2
+ =============================
3
+
4
+ .. automodule:: src.models.server_llm
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.updatable_worker.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.updatable\_worker module
2
+ ===================================
3
+
4
+ .. automodule:: src.models.updatable_worker
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.rst ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src package
2
+ ===========
3
+
4
+ .. automodule:: src
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Subpackages
10
+ -----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.environments
16
+ src.experiments
17
+ src.generation
18
+ src.models
19
+ src.training
20
+ src.utils
21
+
22
+ Submodules
23
+ ----------
24
+
25
+ .. toctree::
26
+ :maxdepth: 4
27
+
28
+ src.run
src_code_for_reproducibility/docs/source/src.training.ppo_train.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.ppo\_train module
2
+ ==============================
3
+
4
+ .. automodule:: src.training.ppo_train
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.training.ppo_train_value_head.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.ppo\_train\_value\_head module
2
+ ===========================================
3
+
4
+ .. automodule:: src.training.ppo_train_value_head
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.training.rst ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.training package
2
+ ====================
3
+
4
+ .. automodule:: src.training
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.training.ppo_train
16
+ src.training.ppo_train_value_head
17
+ src.training.reinforce_training
18
+ src.training.rl_convs_processing
19
+ src.training.train_main
src_code_for_reproducibility/docs/source/src.training.train_main.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.train\_main module
2
+ ===============================
3
+
4
+ .. automodule:: src.training.train_main
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.utils.common_imports.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.utils.common\_imports module
2
+ ================================
3
+
4
+ .. automodule:: src.utils.common_imports
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.utils.export_ppo_training_set.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.utils.export\_ppo\_training\_set module
2
+ ===========================================
3
+
4
+ .. automodule:: src.utils.export_ppo_training_set
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.utils.extra_stats.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.utils.extra\_stats module
2
+ =============================
3
+
4
+ .. automodule:: src.utils.extra_stats
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.utils.model_to_cpu.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.utils.model\_to\_cpu module
2
+ ===============================
3
+
4
+ .. automodule:: src.utils.model_to_cpu
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.utils.rst ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.utils package
2
+ =================
3
+
4
+ .. automodule:: src.utils
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.utils.common_imports
16
+ src.utils.export_ppo_training_set
17
+ src.utils.extra_stats
18
+ src.utils.inherit_args
19
+ src.utils.log_gpu_usage
20
+ src.utils.log_statistics
21
+ src.utils.model_to_cpu
22
+ src.utils.parallel_shuffle
23
+ src.utils.quick_stats
24
+ src.utils.update_start_epoch
src_code_for_reproducibility/docs/source/src.utils.update_start_epoch.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.utils.update\_start\_epoch module
2
+ =====================================
3
+
4
+ .. automodule:: src.utils.update_start_epoch
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance: