Upload config.json with huggingface_hub
Browse files- config.json +1 -0
config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"run_name": "phase2.0_base_lr0.0001_wd0.02", "launch": {"name": "phase2.0_base_lr0.0001_wd0.02-train-85b7d2b3", "cmd": ["scripts/2025_10_02_phase2/base.py", "train", "phase2.0_base_lr0.0001_wd0.02", "ai2/ceres-cirrascale", "--train_module.optim_config.lr=0.0001", "--train_module.optim_config.weight_decay=0.02", "--launch.clusters=[ai2/jupiter-cirrascale-2]", "--launch.priority=urgent"], "budget": "ai2/es-platform", "task_name": "train", "workspace": "ai2/earth-systems", "setup_steps": ["echo \"$GCP_CREDENTIALS\" > $GOOGLE_APPLICATION_CREDENTIALS", "conda install gh --channel conda-forge", "gh auth status", "gh repo clone $REPO_URL .", "git checkout \"$GIT_REF\"", "git submodule update --init --recursive", "conda shell.bash activate base", "pip install -e '.[all]'", "", "pip freeze"], "beaker_image": "petew/olmo-core-tch270cu128-2025-05-16", "num_nodes": 1, "num_gpus": 8, "shared_memory": "256GiB", "clusters": ["ai2/jupiter-cirrascale-2"], "shared_filesystem": true, "priority": "urgent", "preemptible": true, "env_vars": [{"name": "NCCL_DEBUG", "value": "WARN", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvVar"}, {"name": "TORCH_NCCL_TRACE_BUFFER_SIZE", "value": "0", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvVar"}, {"name": "NCCL_BLOCKING_WAIT", "value": "0", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvVar"}, {"name": "GOOGLE_APPLICATION_CREDENTIALS", "value": "/etc/gcp_credentials.json", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvVar"}], "env_secrets": [{"name": "BEAKER_TOKEN", "secret": "joer_BEAKER_TOKEN", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvSecret"}, {"name": "WANDB_API_KEY", "secret": "joer_WANDB_API_KEY", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvSecret"}, {"name": "GITHUB_TOKEN", "secret": "joer_GITHUB_TOKEN", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvSecret"}, {"name": "GCP_CREDENTIALS", "secret": "HELIOS_GCP_CREDENTIALS", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvSecret"}], "nfs": false, "weka_buckets": [{"bucket": "dfive-default", "mount": "/weka/dfive-default", "_CLASS_": "olmo_core.launch.beaker.BeakerWekaBucket"}], "allow_dirty": false, "git": {"repo_url": "https://github.com/allenai/helios", "ref": "b634bf16a4ae99657e6a718bef8ee7f6d07e524f", "branch": "gabi/phase-2", "_CLASS_": "olmo_core.launch.utils.GitConfig"}, "result_dir": "/results", "_CLASS_": "helios.internal.experiment.HeliosBeakerLaunchConfig"}, "model": {"encoder_config": {"supported_modality_names": ["sentinel2_l2a", "sentinel1", "landsat", "worldcover", "srtm", "openstreetmap_raster", "wri_canopy_height_map", "cdl", "worldcereal"], "embedding_size": 768, "max_patch_size": 8, "min_patch_size": 1, "num_heads": 12, "mlp_ratio": 4.0, "depth": 12, "drop_path": 0.1, "max_sequence_length": 12, "num_register_tokens": 0, "learnable_channel_embeddings": true, "random_channel_embeddings": false, "num_projection_layers": 1, "aggregate_then_project": true, "use_flash_attn": false, "frozen_patch_embeddings": false, "qk_norm": false, "log_token_norm_stats": false, "_CLASS_": "helios.nn.flexihelios.EncoderConfig"}, "decoder_config": {"supported_modality_names": ["sentinel2_l2a", "sentinel1", "landsat", "worldcover", "srtm", "openstreetmap_raster", "wri_canopy_height_map", "cdl", "worldcereal"], "encoder_embedding_size": 768, "decoder_embedding_size": 768, "depth": 4, "mlp_ratio": 4.0, "num_heads": 12, "max_sequence_length": 12, "drop_path": 0.0, "learnable_channel_embeddings": true, "random_channel_embeddings": false, "use_flash_attn": false, "qk_norm": false, "_CLASS_": "helios.nn.flexihelios.PredictorConfig"}, "_CLASS_": "helios.nn.latent_mim.LatentMIMConfig"}, "dataset": {"dataset_configs": [{"h5py_dir": "/weka/dfive-default/helios/dataset/osm_sampling/h5py_data_w_missing_timesteps_zstd_3_128_x_4/cdl_gse_landsat_openstreetmap_raster_sentinel1_sentinel2_l2a_srtm_worldcereal_worldcover_worldpop_wri_canopy_height_map/1138828", "training_modalities": ["sentinel2_l2a", "sentinel1", "landsat", "worldcover", "srtm", "openstreetmap_raster", "wri_canopy_height_map", "cdl", "worldcereal"], "dtype": "float32", "normalize": true, "dataset_percentage": 1.0, "seed": 0, "apply_cutmix": false, "_CLASS_": "helios.data.dataset.HeliosDatasetConfig"}], "_CLASS_": "helios.data.concat.HeliosConcatDatasetConfig"}, "data_loader": {"work_dir": "/weka/dfive-default/helios/checkpoints/joer/phase2.0_base_lr0.0001_wd0.02", "global_batch_size": 512, "min_patch_size": 1, "max_patch_size": 8, "sampled_hw_p_list": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], "seed": 3622, "token_budget": 2250, "shuffle": true, "num_workers": 16, "prefetch_factor": 4, "drop_last": true, "num_dataset_repeats_per_epoch": 1, "_CLASS_": "helios.data.dataloader.HeliosDataLoaderConfig"}, "train_module": {"optim_config": {"compile": false, "fixed_fields": ["initial_lr"], "lr": 0.0001, "betas": [0.9, 0.999], "eps": 1e-08, "weight_decay": 0.02, "fused": false, "_CLASS_": "olmo_core.optim.adamw.AdamWConfig"}, "rank_microbatch_size": 32, "transform_config": {"transform_type": "flip_and_rotate", "transform_kwargs": {}, "_CLASS_": "helios.data.transform.TransformConfig"}, "compile_model": false, "dp_config": {"name": "fsdp", "param_dtype": "bfloat16", "reduce_dtype": "float32", "_CLASS_": "olmo_core.distributed.parallel.data_parallel.DataParallelConfig"}, "compile_loss": false, "max_grad_norm": 1.0, "scheduler": {"lr_field": "lr", "initial_lr_field": "initial_lr", "units": "steps", "warmup": 8000, "alpha_f": 0.1, "warmup_min_lr": 0.0, "_CLASS_": "olmo_core.optim.scheduler.CosWithWarmup"}, "find_unused_parameters": true, "loss_config": {"loss_config": {"tau": 0.1}, "_CLASS_": "helios.train.loss.LossConfig"}, "masking_config": {"strategy_config": {"encode_ratio": 0.5, "decode_ratio": 0.5, "allow_encoding_decoding_same_bandset": true, "only_decode_modalities": ["worldcover", "srtm", "openstreetmap_raster", "wri_canopy_height_map", "cdl", "worldcereal"]}, "_CLASS_": "helios.train.masking.MaskingConfig"}, "token_exit_cfg": {"sentinel2_l2a": 0, "sentinel1": 0, "landsat": 0, "worldcover": 0, "srtm": 0, "openstreetmap_raster": 0, "wri_canopy_height_map": 0, "cdl": 0, "worldcereal": 0}, "ema_decay": [1.0, 1.0], "contrastive_config": {"loss_config": {"weight": 0.1}, "_CLASS_": "helios.train.loss.LossConfig"}, "_CLASS_": "helios.train.train_module.contrastive_latentmim.ContrastiveLatentMIMTrainModuleConfig"}, "trainer": {"save_folder": "/weka/dfive-default/helios/checkpoints/joer/phase2.0_base_lr0.0001_wd0.02", "work_dir": "/weka/dfive-default/helios/checkpoints/joer/phase2.0_base_lr0.0001_wd0.02", "load_strategy": "if_available", "checkpointer": {"work_dir": "/weka/dfive-default/helios/checkpoints/joer/phase2.0_base_lr0.0001_wd0.02", "pre_download": false, "throttle_uploads": false, "_CLASS_": "olmo_core.train.checkpoint.CheckpointerConfig"}, "save_overwrite": false, "max_duration": {"value": 300, "unit": "epochs", "_CLASS_": "olmo_core.train.common.Duration"}, "cancel_check_interval": 25, "metrics_collect_interval": 10, "callbacks": {"wandb": {"enabled": true, "name": "phase2.0_base_lr0.0001_wd0.02", "project": "2025_10_02_phase2", "entity": "eai-ai2", "cancel_tags": ["cancel", "canceled", "cancelled"], "upload_dataset_distribution_pre_train": true, "upload_modality_data_band_distribution_pre_train": false, "restart_on_same_run": true, "_CLASS_": "helios.train.callbacks.wandb.HeliosWandBCallback"}, "speed_monitor": {"_CLASS_": "helios.train.callbacks.speed_monitor.HeliosSpeedMonitorCallback"}, "gpu_memory_monitor": {"_CLASS_": "olmo_core.train.callbacks.gpu_memory_monitor.GPUMemoryMonitorCallback"}, "config_saver": {"fname": "config.json", "_CLASS_": "olmo_core.train.callbacks.config_saver.ConfigSaverCallback"}, "downstream_evaluator": {"tasks": {"m-eurosat": {"dataset": "m-eurosat", "embedding_batch_size": 128, "num_workers": 8, "pooling_type": "mean", "norm_stats_from_pretrained": true, "input_modalities": [], "input_layers": [], "patch_size": 4, "probe_batch_size": 32, "epochs": 50, "linear_probe_eval_interval": 50, "eval_interval": {"value": 4000, "unit": "steps", "_CLASS_": "olmo_core.train.common.Duration"}, "probe_type": "linear", "use_pooled_tokens": false, "partition": "default", "norm_method": "norm_no_clip", "_CLASS_": "helios.train.callbacks.evaluator_callback.DownstreamTaskConfig"}, "mados": {"dataset": "mados", "embedding_batch_size": 128, "num_workers": 8, "pooling_type": "mean", "norm_stats_from_pretrained": false, "input_modalities": [], "input_layers": [], "probe_lr": 0.01, "patch_size": 4, "probe_batch_size": 128, "epochs": 50, "linear_probe_eval_interval": 50, "eval_interval": {"value": 4000, "unit": "steps", "_CLASS_": "olmo_core.train.common.Duration"}, "probe_type": "linear", "use_pooled_tokens": false, "partition": "default", "norm_method": "norm_no_clip", "_CLASS_": "helios.train.callbacks.evaluator_callback.DownstreamTaskConfig"}, "pastis": {"dataset": "pastis", "embedding_batch_size": 32, "num_workers": 8, "pooling_type": "mean", "norm_stats_from_pretrained": true, "input_modalities": ["sentinel2_l2a"], "input_layers": [], "probe_lr": 0.1, "patch_size": 4, "probe_batch_size": 8, "epochs": 50, "linear_probe_eval_interval": 50, "eval_interval": {"value": 20000, "unit": "steps", "_CLASS_": "olmo_core.train.common.Duration"}, "probe_type": "linear", "use_pooled_tokens": false, "partition": "default", "norm_method": "norm_no_clip", "_CLASS_": "helios.train.callbacks.evaluator_callback.DownstreamTaskConfig"}, "m_so2sat": {"dataset": "m-so2sat", "embedding_batch_size": 128, "num_workers": 8, "pooling_type": "mean", "norm_stats_from_pretrained": true, "input_modalities": [], "input_layers": [], "patch_size": 4, "probe_batch_size": 32, "epochs": 50, "linear_probe_eval_interval": 50, "eval_interval": {"value": 20000, "unit": "steps", "_CLASS_": "olmo_core.train.common.Duration"}, "probe_type": "linear", "use_pooled_tokens": false, "partition": "default", "norm_method": "norm_no_clip", "_CLASS_": "helios.train.callbacks.evaluator_callback.DownstreamTaskConfig"}, "nandi_sentinel2": {"dataset": "nandi", "embedding_batch_size": 128, "num_workers": 0, "pooling_type": "mean", "norm_stats_from_pretrained": true, "input_modalities": ["sentinel2_l2a"], "input_layers": ["sentinel2"], "patch_size": 4, "probe_batch_size": 32, "epochs": 50, "linear_probe_eval_interval": 50, "eval_interval": {"value": 20000, "unit": "steps", "_CLASS_": "olmo_core.train.common.Duration"}, "probe_type": "linear", "use_pooled_tokens": false, "partition": "default", "norm_method": "norm_no_clip", "_CLASS_": "helios.train.callbacks.evaluator_callback.DownstreamTaskConfig"}, "awf_sentinel2": {"dataset": "awf", "embedding_batch_size": 128, "num_workers": 0, "pooling_type": "mean", "norm_stats_from_pretrained": true, "input_modalities": ["sentinel2_l2a"], "input_layers": ["sentinel2"], "patch_size": 4, "probe_batch_size": 32, "epochs": 50, "linear_probe_eval_interval": 50, "eval_interval": {"value": 20000, "unit": "steps", "_CLASS_": "olmo_core.train.common.Duration"}, "probe_type": "linear", "use_pooled_tokens": false, "partition": "default", "norm_method": "norm_no_clip", "_CLASS_": "helios.train.callbacks.evaluator_callback.DownstreamTaskConfig"}, "awf_sentinel1": {"dataset": "awf", "embedding_batch_size": 128, "num_workers": 0, "pooling_type": "mean", "norm_stats_from_pretrained": true, "input_modalities": ["sentinel1"], "input_layers": ["sentinel1_ascending"], "patch_size": 4, "probe_batch_size": 32, "epochs": 50, "linear_probe_eval_interval": 50, "eval_interval": {"value": 20000, "unit": "steps", "_CLASS_": "olmo_core.train.common.Duration"}, "probe_type": "linear", "use_pooled_tokens": false, "partition": "default", "norm_method": "norm_no_clip", "_CLASS_": "helios.train.callbacks.evaluator_callback.DownstreamTaskConfig"}, "awf_landsat": {"dataset": "awf", "embedding_batch_size": 128, "num_workers": 0, "pooling_type": "mean", "norm_stats_from_pretrained": true, "input_modalities": ["landsat"], "input_layers": ["landsat"], "patch_size": 4, "probe_batch_size": 32, "epochs": 50, "linear_probe_eval_interval": 50, "eval_interval": {"value": 20000, "unit": "steps", "_CLASS_": "olmo_core.train.common.Duration"}, "probe_type": "linear", "use_pooled_tokens": false, "partition": "default", "norm_method": "norm_no_clip", "_CLASS_": "helios.train.callbacks.evaluator_callback.DownstreamTaskConfig"}}, "enabled": true, "eval_on_startup": false, "cancel_after_first_eval": false, "run_on_test": false, "_CLASS_": "helios.train.callbacks.evaluator_callback.DownstreamEvaluatorCallbackConfig"}, "garbage_collector": {"gc_interval": 1, "enabled": true, "_CLASS_": "olmo_core.train.callbacks.garbage_collector.GarbageCollectorCallback"}, "beaker": {"enabled": true, "result_dir": "/results", "_CLASS_": "olmo_core.train.callbacks.beaker.BeakerCallback"}, "checkpointer": {"save_interval": 5000, "ephemeral_save_interval": 250, "remove": "ephemeral_only", "enabled": true, "_CLASS_": "olmo_core.train.callbacks.checkpointer.CheckpointerCallback"}}, "bookkeeping_soft_timeout": 30, "no_checkpoints": false, "no_evals": false, "_CLASS_": "olmo_core.train.config.TrainerConfig"}, "visualize": {"output_dir": "/weka/dfive-default/helios/checkpoints/joer/phase2.0_base_lr0.0001_wd0.02/visualizations", "std_multiplier": 2.0, "_CLASS_": "helios.internal.experiment.HeliosVisualizeConfig"}, "init_seed": 12536, "_CLASS_": "helios.internal.experiment.HeliosExperimentConfig"}
|