dereckpichemila commited on Sep 5

Commit

4728a06

verified ·

1 Parent(s): b6f2447

Add files using upload-large-folder tool

Browse files

Files changed (50) hide show

.hydra/config.yaml +155 -0
.hydra/hydra.yaml +154 -0
.hydra/overrides.yaml +1 -0
run.log +0 -0
seed_0/Qwen/Qwen3-4B-Instruct-2507/adapters/README.md +207 -0
seed_0/Qwen/Qwen3-4B-Instruct-2507/adapters/critic_adapter/adapter_config.json +42 -0
src_code_for_reproducibility/docs/Makefile +19 -0
src_code_for_reproducibility/docs/generate_docs.py +249 -0
src_code_for_reproducibility/docs/make.bat +35 -0
src_code_for_reproducibility/docs/source/contributing.rst +0 -0
src_code_for_reproducibility/docs/source/environments.rst +35 -0
src_code_for_reproducibility/docs/source/index.rst +22 -0
src_code_for_reproducibility/docs/source/installation.rst +10 -0
src_code_for_reproducibility/docs/source/launch.rst +0 -0
src_code_for_reproducibility/docs/source/marl_standard.rst +141 -0
src_code_for_reproducibility/docs/source/modules.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.dond.dond_agent.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.dond.dond_return_funcs.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.dond.dond_statistics_funcs.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.dond.dond_training_data_funcs.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.dond.rst +19 -0
src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_agent.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_game.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_statistics_funcs.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_training_data_funcs.rst +7 -0
src_code_for_reproducibility/docs/source/src.experiments.arithmetic_test.rst +7 -0
src_code_for_reproducibility/docs/source/src.experiments.generate_and_train.rst +7 -0
src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst +7 -0
src_code_for_reproducibility/docs/source/src.experiments.rst +17 -0
src_code_for_reproducibility/docs/source/src.generation.rst +15 -0
src_code_for_reproducibility/docs/source/src.models.dummy_hf_agent.rst +7 -0
src_code_for_reproducibility/docs/source/src.models.dummy_local_llm.rst +7 -0
src_code_for_reproducibility/docs/source/src.models.local_llm.rst +7 -0
src_code_for_reproducibility/docs/source/src.models.new_local_llm.rst +7 -0
src_code_for_reproducibility/docs/source/src.models.oai_agent.rst +7 -0
src_code_for_reproducibility/docs/source/src.models.rst +20 -0
src_code_for_reproducibility/docs/source/src.models.server_llm.rst +7 -0
src_code_for_reproducibility/docs/source/src.models.updatable_worker.rst +7 -0
src_code_for_reproducibility/docs/source/src.rst +28 -0
src_code_for_reproducibility/docs/source/src.training.ppo_train.rst +7 -0
src_code_for_reproducibility/docs/source/src.training.ppo_train_value_head.rst +7 -0
src_code_for_reproducibility/docs/source/src.training.rst +19 -0
src_code_for_reproducibility/docs/source/src.training.train_main.rst +7 -0
src_code_for_reproducibility/docs/source/src.utils.common_imports.rst +7 -0
src_code_for_reproducibility/docs/source/src.utils.export_ppo_training_set.rst +7 -0
src_code_for_reproducibility/docs/source/src.utils.extra_stats.rst +7 -0
src_code_for_reproducibility/docs/source/src.utils.model_to_cpu.rst +7 -0
src_code_for_reproducibility/docs/source/src.utils.rst +24 -0
src_code_for_reproducibility/docs/source/src.utils.update_start_epoch.rst +7 -0

.hydra/config.yaml ADDED Viewed

	@@ -0,0 +1,155 @@

+experiment:
+  nb_epochs: 1
+  nb_matches_per_iteration: 64
+  reinit_matches_each_it: true
+  checkpoint_every_n_iterations: 50
+  start_epoch: 0
+  resume_experiment: true
+  base_seed: 0
+  seed_group_size: 8
+  train: false
+  name: openai_gpt_4.1_tas
+  method: generate_and_train
+  description: Trust-and-Split negotiation game
+temperature: 1.0
+markov_games:
+  runner_method_name: LinearRunner
+  runner_kwargs: {}
+  group_by_round: true
+  simulation_class_name: TrustAndSplitSimulation
+  simulation_init_args:
+    agent_ids: ${agent_ids}
+    nb_of_rounds: 10
+    quota_messages_per_agent_per_round: 1
+  agents:
+    0:
+      agent_id: ${agent_0_id}
+      agent_class_name: TrustAndSplitAgent
+      policy_id: openai_llm
+      init_kwargs:
+        goal: Maximize your total points over the whole game.
+        num_message_chars: 500
+    1:
+      agent_id: ${agent_1_id}
+      agent_class_name: TrustAndSplitAgent
+      policy_id: openai_llm
+      init_kwargs:
+        goal: Maximize your total points over the whole game.
+        num_message_chars: 500
+models:
+  base_llm:
+    class: LeanLocalLLM
+    init_args:
+      llm_id: base_llm
+      model_name: Qwen/Qwen3-4B-Instruct-2507
+      inference_backend: dummy
+      max_thinking_characters: 0
+      hf_kwargs:
+        device_map: auto
+        torch_dtype: bfloat16
+        max_memory:
+          0: 20GiB
+        attn_implementation: flash_attention_2
+      inference_backend_init_kwargs:
+        seed: ${experiment.base_seed}
+        enable_prefix_caching: true
+        max_model_len: 10000.0
+        gpu_memory_utilization: 0.5
+        dtype: bfloat16
+        trust_remote_code: true
+        max_lora_rank: 32
+      inference_backend_sampling_params:
+        temperature: ${temperature}
+        top_p: 1.0
+        max_tokens: 400
+        top_k: -1
+      adapter_configs:
+        agent_adapter:
+          task_type: CAUSAL_LM
+          r: 32
+          lora_alpha: 64
+          lora_dropout: 0.0
+          target_modules: all-linear
+        critic_adapter:
+          task_type: CAUSAL_LM
+          r: 32
+          lora_alpha: 64
+          lora_dropout: 0.0
+          target_modules: all-linear
+      regex_max_attempts: 3
+  openai_llm:
+    class: LargeLanguageModelOpenAI
+    init_args:
+      use_reasoning: false
+      llm_id: openai_llm
+      model: gpt-4.1
+      base_url: https://api.openai.com/v1
+      timeout_s: 6000.0
+      regex_max_attempts: 3
+      sampling_params:
+        temperature: 1.0
+        top_p: 1.0
+        max_output_tokens: 200
+critics:
+  agent_critic:
+    module_pointer:
+    - base_llm
+    - critic_adapter
+optimizers:
+  agent_optimizer:
+    module_pointer:
+    - base_llm
+    - agent_adapter
+    optimizer_class_name: torch.optim.Adam
+    init_args:
+      lr: 1.0e-06
+      weight_decay: 0.0
+  critic_optimizer:
+    module_pointer: agent_critic
+    optimizer_class_name: torch.optim.Adam
+    init_args:
+      lr: 3.0e-06
+      weight_decay: 0.0
+trainers:
+  agent_trainer:
+    class: TrainerNaive
+    module_pointers:
+      policy:
+      - base_llm
+      - agent_adapter
+      policy_optimizer: agent_optimizer
+      critic: agent_critic
+      critic_optimizer: critic_optimizer
+    kwargs:
+      entropy_coeff: 0.0
+      kl_coeff: 0.0
+      gradient_clipping: 1.0
+      restrict_tokens: null
+      mini_batch_size: 1
+      use_gradient_checkpointing: false
+      use_qwen_reasoning_mask: false
+      temperature: ${temperature}
+      device: cuda:0
+      use_gae: false
+      whiten_advantages: false
+      whiten_advantages_time_step_wise: false
+      skip_discounted_state_visitation: true
+      use_gae_lambda_annealing: false
+      gae_lambda_annealing_method: None
+      gae_lambda_annealing_method_params: None
+      gae_lambda_annealing_limit: 0.95
+      discount_factor: 0.9
+      use_rloo: true
+      enable_tokenwise_logging: false
+      pg_loss_normalization: batch
+      reward_normalizing_constant: 200.0
+train_on_which_data:
+  agent_trainer: []
+common_agent_kwargs:
+  goal: Maximize your total points over the whole game.
+  num_message_chars: 500
+agent_0_id: Alice
+agent_1_id: Bob
+agent_ids:
+- Alice
+- Bob

.hydra/hydra.yaml ADDED Viewed

	@@ -0,0 +1,154 @@

+hydra:
+  run:
+    dir: ${oc.env:SCRATCH}/llm_negotiation/${now:%Y_%m}/${experiment.name}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+      Use --hydra-help to view Hydra specific help
+      '
+    template: '${hydra.help.header}
+      == Configuration groups ==
+      Compose your configuration from those groups (group=option)
+      $APP_CONFIG_GROUPS
+      == Config ==
+      Override anything in the config (foo.bar=value)
+      $CONFIG
+      ${hydra.help.footer}
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+      See https://hydra.cc for more info.
+      == Flags ==
+      $FLAGS_HELP
+      == Configuration groups ==
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+      $HYDRA_CONFIG_GROUPS
+      Use ''--cfg hydra'' to Show the Hydra config.
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: RUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=RUN
+    task: []
+  job:
+    name: run
+    chdir: false
+    override_dirname: ''
+    id: ???
+    num: ???
+    config_name: openai_gpt_4.1_tas
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.3.2
+    version_base: '1.1'
+    cwd: /home/mila/d/dereck.piche/experiment_safe/exp_safe_llm_nego
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: /home/mila/d/dereck.piche/experiment_safe/exp_safe_llm_nego/configs
+      schema: file
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /network/scratch/d/dereck.piche/llm_negotiation/2025_09/openai_gpt_4.1_tas
+    choices:
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: basic
+      hydra/output: default
+  verbose: false

.hydra/overrides.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ []

run.log ADDED Viewed

The diff for this file is too large to render. See raw diff

seed_0/Qwen/Qwen3-4B-Instruct-2507/adapters/README.md ADDED Viewed

	@@ -0,0 +1,207 @@

+---
+base_model: Qwen/Qwen3-4B-Instruct-2507
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3-4B-Instruct-2507
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.17.0

seed_0/Qwen/Qwen3-4B-Instruct-2507/adapters/critic_adapter/adapter_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen3-4B-Instruct-2507",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "gate_proj",
+    "up_proj",
+    "down_proj",
+    "q_proj",
+    "o_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

src_code_for_reproducibility/docs/Makefile ADDED Viewed

	@@ -0,0 +1,19 @@

+# Minimal makefile for Sphinx documentation
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(SPHINXFLAGS)
+.PHONY: help Makefile
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(SPHINXFLAGS)

src_code_for_reproducibility/docs/generate_docs.py ADDED Viewed

	@@ -0,0 +1,249 @@

+#!/usr/bin/env python3
+"""
+Script to automatically generate Sphinx documentation for all modules and build the HTML website.
+"""
+import importlib.util
+import os
+import subprocess
+import sys
+def check_and_install_dependencies():
+    """Check for required dependencies and install them if missing."""
+    required_packages = [
+        "sphinx",
+        "sphinx-rtd-theme",
+        "sphinxcontrib-napoleon",
+        "sphinxcontrib-mermaid",
+        "sphinx-autodoc-typehints",
+    ]
+    missing_packages = []
+    for package in required_packages:
+        # Convert package name to module name (replace - with _)
+        module_name = package.replace("-", "_")
+        # Check if the package is installed
+        if importlib.util.find_spec(module_name) is None:
+            missing_packages.append(package)
+    # Install missing packages
+    if missing_packages:
+        print(f"Installing missing dependencies: {', '.join(missing_packages)}")
+        subprocess.check_call(
+            [sys.executable, "-m", "pip", "install"] + missing_packages
+        )
+        print("Dependencies installed successfully")
+    else:
+        print("All required dependencies are already installed")
+def create_makefile(docs_dir):
+    """Create a Makefile for Sphinx documentation if it doesn't exist."""
+    makefile_path = os.path.join(docs_dir, "Makefile")
+    if os.path.exists(makefile_path):
+        print(f"Makefile already exists at {makefile_path}")
+        return
+    print(f"Creating Makefile at {makefile_path}")
+    makefile_content = """# Minimal makefile for Sphinx documentation
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(SPHINXFLAGS)
+.PHONY: help Makefile
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(SPHINXFLAGS)
+"""
+    with open(makefile_path, "w") as f:
+        f.write(makefile_content)
+    print("Makefile created successfully")
+def create_make_bat(docs_dir):
+    """Create a make.bat file for Windows if it doesn't exist."""
+    make_bat_path = os.path.join(docs_dir, "make.bat")
+    if os.path.exists(make_bat_path):
+        print(f"make.bat already exists at {make_bat_path}")
+        return
+    print(f"Creating make.bat at {make_bat_path}")
+    make_bat_content = """@ECHO OFF
+pushd %~dp0
+REM Command file for Sphinx documentation
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+if "%1" == "" goto help
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+:end
+popd
+"""
+    with open(make_bat_path, "w") as f:
+        f.write(make_bat_content)
+    print("make.bat created successfully")
+def main():
+    # Check and install required dependencies
+    print("=== Checking dependencies ===")
+    check_and_install_dependencies()
+    # Get the directory of this script
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    # Path to the project root
+    project_root = os.path.dirname(script_dir)
+    # Path to the source directory
+    source_dir = os.path.join(project_root, "src")
+    # Path to the docs source directory
+    docs_source_dir = os.path.join(script_dir, "source")
+    # Print paths for debugging
+    print(f"Script directory: {script_dir}")
+    print(f"Project root: {project_root}")
+    print(f"Source directory: {source_dir}")
+    print(f"Docs source directory: {docs_source_dir}")
+    # Make sure the source directory exists
+    if not os.path.exists(source_dir):
+        print(f"Error: Source directory {source_dir} does not exist!")
+        sys.exit(1)
+    # Make sure the docs source directory exists
+    if not os.path.exists(docs_source_dir):
+        print(f"Creating docs source directory: {docs_source_dir}")
+        os.makedirs(docs_source_dir)
+    # Step 1: Run sphinx-apidoc to generate .rst files for all modules
+    print("\n=== Generating API documentation ===")
+    cmd = [
+        "sphinx-apidoc",
+        "-f",  # Force overwriting of existing files
+        "-e",  # Put module documentation before submodule documentation
+        "-M",  # Put module documentation before subpackage documentation
+        "-o",
+        docs_source_dir,  # Output directory
+        source_dir,  # Source code directory
+    ]
+    print(f"Running command: {' '.join(cmd)}")
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    # Print the output of the command
+    print("STDOUT:")
+    print(result.stdout)
+    print("STDERR:")
+    print(result.stderr)
+    if result.returncode != 0:
+        print(f"Error: sphinx-apidoc failed with return code {result.returncode}")
+        sys.exit(1)
+    # List the files in the docs source directory
+    print("\nFiles in docs/source directory:")
+    for file in sorted(os.listdir(docs_source_dir)):
+        print(f"  {file}")
+    print("\nDocumentation source files generated successfully!")
+    # Step 2: Create Makefile and make.bat if they don't exist
+    create_makefile(script_dir)
+    create_make_bat(script_dir)
+    # Step 3: Build the HTML documentation
+    print("\n=== Building HTML documentation ===")
+    # Determine the build command based on the platform
+    if os.name == "nt":  # Windows
+        build_cmd = ["make.bat", "html"]
+    else:  # Unix/Linux/Mac
+        build_cmd = ["make", "html"]
+    # Change to the docs directory to run the build command
+    os.chdir(script_dir)
+    print(f"Running command: {' '.join(build_cmd)}")
+    build_result = subprocess.run(build_cmd, capture_output=True, text=True)
+    # Print the output of the build command
+    print("STDOUT:")
+    print(build_result.stdout)
+    print("STDERR:")
+    print(build_result.stderr)
+    if build_result.returncode != 0:
+        print(f"Error: HTML build failed with return code {build_result.returncode}")
+        sys.exit(1)
+    # Get the path to the built HTML documentation
+    html_dir = os.path.join(script_dir, "build", "html")
+    index_path = os.path.join(html_dir, "index.html")
+    if os.path.exists(index_path):
+        print(f"\nHTML documentation built successfully!")
+        print(f"You can view it by opening: {index_path}")
+        # Try to open the documentation in a browser
+        try:
+            import webbrowser
+            print("\nAttempting to open documentation in your default browser...")
+            webbrowser.open(f"file://{index_path}")
+        except Exception as e:
+            print(f"Could not open browser automatically: {e}")
+    else:
+        print(f"\nWarning: HTML index file not found at {index_path}")
+if __name__ == "__main__":
+    main()

src_code_for_reproducibility/docs/make.bat ADDED Viewed

	@@ -0,0 +1,35 @@

+@ECHO OFF
+pushd %~dp0
+REM Command file for Sphinx documentation
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+if "%1" == "" goto help
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+:end
+popd

src_code_for_reproducibility/docs/source/contributing.rst ADDED Viewed

File without changes

src_code_for_reproducibility/docs/source/environments.rst ADDED Viewed

	@@ -0,0 +1,35 @@

+=================
+MARL Environments
+=================
+This section provides detailed documentation for the multi-agent negotiation environments included in the library.
+Each environment follows the standard interface described in :doc:`../environments` but has its own unique game rules,
+dynamics, and implementation details.
+.. toctree::
+   :maxdepth: 2
+   :caption: Available Environments:
+   environments/ipd
+   environments/diplomacy
+   environments/dond
+Overview
+--------
+The library currently includes the following environments:
+1. **Iterated Prisoner's Dilemma (IPD)**: A classic game theory problem where two agents repeatedly decide whether to cooperate or defect, with different payoffs based on their joint actions.
+2. **Diplomacy**: An adaptation of the board game Diplomacy, where seven European powers compete for control of supply centers through strategic moves and alliances.
+3. **Deal or No Deal (DOND)**: A negotiation environment based on `the paper Deal or No Deal? End-to-End Learning for Negotiation Dialogues <https://arxiv.org/pdf/1706.05125>`_ in which agents negotiate over the distribution of a set of prizes.
+Each environment documentation includes:
+- Game rules and background
+- Implementation details
+- API reference
+- Example usage
+- Advanced features and customization options

src_code_for_reproducibility/docs/source/index.rst ADDED Viewed

	@@ -0,0 +1,22 @@

+Welcome to LLM Negotiation's documentation!
+===========================================
+This library is a collection of tools for training and evaluating LLM-based agents in multi-agent environments. It is designed to be easy to use and extend.
+.. toctree::
+   :maxdepth: 3
+   :caption: Contents:
+   installation
+   marl_standard
+   environments
+   launch
+   usage
+   modules
+   contributing
+Indices and tables
+==================
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`

src_code_for_reproducibility/docs/source/installation.rst ADDED Viewed

	@@ -0,0 +1,10 @@

+Installation
+===========
+To install the package, run:
+.. code-block:: bash
+   git clone https://github.com/yourusername/llm_negotiation.git
+   cd llm_negotiation
+   pip install -e .

src_code_for_reproducibility/docs/source/launch.rst ADDED Viewed

File without changes

src_code_for_reproducibility/docs/source/marl_standard.rst ADDED Viewed

	@@ -0,0 +1,141 @@

+=================
+Abstract Standard for Multi-Agent Negotiation Environments
+=================
+Multi-Agent Negotiation Environments require more features than gymnasium environments in order to be used as interfaces in general game running code.
+The two fundamental differences between gymnasium environments and Multi-Agent Negotiation Environments are:
+1. Response from the LLM is a text action, not a discrete action. Therefore, appropriate parsing of the text is required. The model may need to be run multiple times to get the full action.
+    This is why we introduce the `AgentHandler` class, which is responsible for parsing the LLM's response.
+2. The environment needs to be able to handle multi-agent interactions.
+    This is why we introduce the `NegotiationEnvironment` class, which is responsible for handling the multi-agent interactions.
+3. MARL environments are complex to describe. In different contexts, the same environment may be described differently. Therefore, both the environement and the agent handlers are
+    responsible for describing a particular trajectory. This information is given by the `get_log_info` method.
+4. There might be a lot of overlap between the neural networks used by each agent. For instance, the same model may be used for all agents. This motivates a requirement for a
+    policy identifier for each agent.
+Taking inspiration from the `gymnasium <https://gymnasium.farama.org/>`_ library, we introduce a new standard for Multi-Agent Negotiation Environments.
+Our standard is based on the following features:
+Environments are of the form:
+.. code-block:: python
+    class MarlEnvironment():
+        def __init__(self):
+            """Initialize the environment."""
+            pass
+        def reset(self):
+            """Reset the environment to an initial state and return the initial observation.
+            Returns:
+                observation (dict): A dictionary where keys are agent identifiers and values are observations.
+            """
+            # (...)
+            return observation
+        def step(self, actions):
+            """Take a step in the environment using the provided actions.
+            Args:
+                actions (dict): A dictionary where keys are agent identifiers and values are actions.
+            Returns:
+                observations (dict): A dictionary where keys are agent identifiers and values are observations.
+                reward (dict): A dictionary where keys are agent identifiers and values are rewards.
+                done (bool): Whether the episode has ended.
+                info (dict): Additional information about the environment.
+            """
+            # (...)
+            return observations, done, info
+        def get_log_info(self):
+            """Get additional information about the environment. This information is used to log the game.
+            Returns:
+                log_info (dict): Information about the environment required to log the game.
+            """
+            # (...)
+            return log_info
+        def render(self):
+            """Render the current state of the environment."""
+            pass
+        def close(self):
+            """Perform any necessary cleanup."""
+            pass
+    class AgentState():
+        def __init__(self):
+            """Initialize the agent state."""
+            pass
+        def step(self, observation_from_env, policy_output=None):
+            """Update the agent state based on the observation and action.
+            The action is the output of the LLM.
+            """
+            Args:
+                observation_from_env (dict): The observation of the environment.
+                policy_output : The output of the policy.
+            Returns:
+                policy_id (str): The policy identifier.
+                policy_input (dict): The input to the policy.
+                action : The official action to be sent to the environment.
+                done (bool): Whether the LLM action is ready to be sent to the environment.
+                info (dict): Additional information about the agent.
+            """
+            # (...)
+            return policy_id, policy_input, action, done, info
+        def get_log_info(self):
+            """Get information about the agent required to log a trajectory.
+            Returns:
+                log_info (dict): Information about the agent required to log a trajectory.
+            """
+            # (...)
+            return log_info
+        def render(self):
+            """Render the current state of the environment."""
+            pass
+        def close(self):
+            """Perform any necessary cleanup."""
+            pass
+Implicitely, the keys of the `observations` in the `step` method of the `MarlEnvironment` interface represent the set of agents from which an action is expected at the current step. The next step should only expect actions from the agents in the `observations` dictionary.
+As you can see, both classes have a `get_log_info` method. This method is used to log the game. It returns a dictionary with keys being the agent identifiers and values being the information to log. The reason we need this is because the environment and the agent handler may need to log different information. It makes it easier to log from the perspective of each agent. The core environment class should not need to know about the details of the agent handler.
+Running Environments in Parallel
+--------------------------------
+This standard allows the use of the `run_batched_matches` function (TODO: link) to run environments in an efficient way. The core idea is to batch the policy calls for all agents in the environment.
+.. note::
+   The ``run_batched_matches`` function allows you to run multiple negotiation games, or "matches," in parallel.
+   After each environment is initialized, the function continuously loops over all active matches and checks which agents
+   are still pending actions. Each agent's logic can require multiple calls to the policy (e.g., an LLM) before an action
+   becomes "ready" to be sent to the environment. (For instance, an agent might need multiple policy calls before having a string which can be parsed into a valid action.) While an agent is waiting for a policy output, these calls for all agents across all matches are grouped together by unique policy identifier and processed in batch for efficiency. This is the core functionality of the ``run_batched_matches`` function.
+   Only once all actions from the required agents at a given step for an environment are ready does the function make a single ``env.step(...)`` call; this ensures
+   every match moves forward in lockstep for all its active agents. As soon as an environment signals it is done, the function
+   retrieves logged information from both the environment and the agent states before removing this match from the active set.
+   If there are more matches waiting to be processed, they are then started one by one to maintain the specified degree of parallelism.
+   This batching approach provides an efficient mechanism to handle multi-agent or multi-policy environments, ensuring minimal
+   overhead and a clear, unified flow for stepping through matches.
+Here is a diagram that shows how the `run_batched_matches` function works at a high level:
+.. image:: media/runbatch.png
+   :alt: Alternate text for the image
+   :width: 1000px

src_code_for_reproducibility/docs/source/modules.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src
+===
+.. toctree::
+   :maxdepth: 4
+   src

src_code_for_reproducibility/docs/source/src.environments.dond.dond_agent.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.dond.dond\_agent module
+========================================
+.. automodule:: src.environments.dond.dond_agent
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.dond.dond_return_funcs.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.dond.dond\_return\_funcs module
+================================================
+.. automodule:: src.environments.dond.dond_return_funcs
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.dond.dond_statistics_funcs.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.dond.dond\_statistics\_funcs module
+====================================================
+.. automodule:: src.environments.dond.dond_statistics_funcs
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.dond.dond_training_data_funcs.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.dond.dond\_training\_data\_funcs module
+========================================================
+.. automodule:: src.environments.dond.dond_training_data_funcs
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.dond.rst ADDED Viewed

	@@ -0,0 +1,19 @@

+src.environments.dond package
+=============================
+.. automodule:: src.environments.dond
+   :members:
+   :undoc-members:
+   :show-inheritance:
+Submodules
+----------
+.. toctree::
+   :maxdepth: 4
+   src.environments.dond.dond_agent
+   src.environments.dond.dond_game
+   src.environments.dond.dond_log_funcs
+   src.environments.dond.dond_statistics_funcs
+   src.environments.dond.dond_training_data_funcs

src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.environment\_imports module
+============================================
+.. automodule:: src.environments.environment_imports
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_agent.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.ipd.ipd\_agent module
+======================================
+.. automodule:: src.environments.ipd.ipd_agent
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_game.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.ipd.ipd\_game module
+=====================================
+.. automodule:: src.environments.ipd.ipd_game
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_statistics_funcs.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.ipd.ipd\_statistics\_funcs module
+==================================================
+.. automodule:: src.environments.ipd.ipd_statistics_funcs
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_training_data_funcs.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.ipd.ipd\_training\_data\_funcs module
+======================================================
+.. automodule:: src.environments.ipd.ipd_training_data_funcs
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.experiments.arithmetic_test.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.experiments.arithmetic\_test module
+=======================================
+.. automodule:: src.experiments.arithmetic_test
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.experiments.generate_and_train.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.experiments.generate\_and\_train module
+===========================================
+.. automodule:: src.experiments.generate_and_train
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.experiments.last\_completion module
+=======================================
+.. automodule:: src.experiments.last_completion
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.experiments.rst ADDED Viewed

	@@ -0,0 +1,17 @@

+src.experiments package
+=======================
+.. automodule:: src.experiments
+   :members:
+   :undoc-members:
+   :show-inheritance:
+Submodules
+----------
+.. toctree::
+   :maxdepth: 4
+   src.experiments.arithmetic_test
+   src.experiments.generate_and_train
+   src.experiments.last_completion

src_code_for_reproducibility/docs/source/src.generation.rst ADDED Viewed

	@@ -0,0 +1,15 @@

+src.generation package
+======================
+.. automodule:: src.generation
+   :members:
+   :undoc-members:
+   :show-inheritance:
+Submodules
+----------
+.. toctree::
+   :maxdepth: 4
+   src.generation.run_games

src_code_for_reproducibility/docs/source/src.models.dummy_hf_agent.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.models.dummy\_hf\_agent module
+==================================
+.. automodule:: src.models.dummy_llm_agent
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.models.dummy_local_llm.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.models.dummy\_local\_llm module
+===================================
+.. automodule:: src.models.dummy_local_llm
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.models.local_llm.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.models.local\_llm module
+============================
+.. automodule:: src.models.local_llm
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.models.new_local_llm.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.models.new\_local\_llm module
+=================================
+.. automodule:: src.models.new_local_llm
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.models.oai_agent.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.models.oai\_agent module
+============================
+.. automodule:: src.models.oai_agent
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.models.rst ADDED Viewed

	@@ -0,0 +1,20 @@

+src.models package
+==================
+.. automodule:: src.models
+   :members:
+   :undoc-members:
+   :show-inheritance:
+Submodules
+----------
+.. toctree::
+   :maxdepth: 4
+   src.models.dummy_local_llm
+   src.models.local_llm
+   src.models.new_local_llm
+   src.models.server_llm
+   src.models.updatable_worker
+   src.models.vllm_worker_wrap

src_code_for_reproducibility/docs/source/src.models.server_llm.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.models.server\_llm module
+=============================
+.. automodule:: src.models.server_llm
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.models.updatable_worker.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.models.updatable\_worker module
+===================================
+.. automodule:: src.models.updatable_worker
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.rst ADDED Viewed

	@@ -0,0 +1,28 @@

+src package
+===========
+.. automodule:: src
+   :members:
+   :undoc-members:
+   :show-inheritance:
+Subpackages
+-----------
+.. toctree::
+   :maxdepth: 4
+   src.environments
+   src.experiments
+   src.generation
+   src.models
+   src.training
+   src.utils
+Submodules
+----------
+.. toctree::
+   :maxdepth: 4
+   src.run

src_code_for_reproducibility/docs/source/src.training.ppo_train.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.training.ppo\_train module
+==============================
+.. automodule:: src.training.ppo_train
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.training.ppo_train_value_head.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.training.ppo\_train\_value\_head module
+===========================================
+.. automodule:: src.training.ppo_train_value_head
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.training.rst ADDED Viewed

	@@ -0,0 +1,19 @@

+src.training package
+====================
+.. automodule:: src.training
+   :members:
+   :undoc-members:
+   :show-inheritance:
+Submodules
+----------
+.. toctree::
+   :maxdepth: 4
+   src.training.ppo_train
+   src.training.ppo_train_value_head
+   src.training.reinforce_training
+   src.training.rl_convs_processing
+   src.training.train_main

src_code_for_reproducibility/docs/source/src.training.train_main.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.training.train\_main module
+===============================
+.. automodule:: src.training.train_main
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.utils.common_imports.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.utils.common\_imports module
+================================
+.. automodule:: src.utils.common_imports
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.utils.export_ppo_training_set.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.utils.export\_ppo\_training\_set module
+===========================================
+.. automodule:: src.utils.export_ppo_training_set
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.utils.extra_stats.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.utils.extra\_stats module
+=============================
+.. automodule:: src.utils.extra_stats
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.utils.model_to_cpu.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.utils.model\_to\_cpu module
+===============================
+.. automodule:: src.utils.model_to_cpu
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.utils.rst ADDED Viewed

	@@ -0,0 +1,24 @@

+src.utils package
+=================
+.. automodule:: src.utils
+   :members:
+   :undoc-members:
+   :show-inheritance:
+Submodules
+----------
+.. toctree::
+   :maxdepth: 4
+   src.utils.common_imports
+   src.utils.export_ppo_training_set
+   src.utils.extra_stats
+   src.utils.inherit_args
+   src.utils.log_gpu_usage
+   src.utils.log_statistics
+   src.utils.model_to_cpu
+   src.utils.parallel_shuffle
+   src.utils.quick_stats
+   src.utils.update_start_epoch

src_code_for_reproducibility/docs/source/src.utils.update_start_epoch.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.utils.update\_start\_epoch module
+=====================================
+.. automodule:: src.utils.update_start_epoch
+   :members:
+   :undoc-members:
+   :show-inheritance: