import os from typing import Union from transformers.configuration_utils import PretrainedConfig from transformers.utils import logging logger = logging.get_logger(__name__) class Siglip2VisionConfig(PretrainedConfig): model_type = "siglip2_navit_rope_model" base_config_key = "vision_config" def __init__( self, hidden_size=768, intermediate_size=3072, num_hidden_layers=12, num_attention_heads=12, num_channels=3, patch_size=16, hidden_act="gelu_pytorch_tanh", layer_norm_eps=1e-6, preserve_original_pe=True, disable_rope=False, num_patches=1369, rope_theta=10000.0, **kwargs, ): super().__init__(**kwargs) self.hidden_size = hidden_size self.intermediate_size = intermediate_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads self.num_channels = num_channels self.patch_size = patch_size self.hidden_act = hidden_act self.layer_norm_eps = layer_norm_eps self.preserve_original_pe = preserve_original_pe self.disable_rope = disable_rope self.num_patches = num_patches self.rope_theta = rope_theta