import torch from transformers import AutoTokenizer, T5EncoderModel class T5Embedder: available_models = ["google/t5-v1_1-xxl"] def __init__( self, device, from_pretrained=None, *, cache_dir=None, hf_token=None, use_text_preprocessing=True, t5_model_kwargs=None, torch_dtype=None, use_offload_folder=None, model_max_length=120, local_files_only=False, ): self.device = torch.device(device) self.torch_dtype = torch_dtype or torch.bfloat16 self.cache_dir = cache_dir if t5_model_kwargs is None: t5_model_kwargs = { "low_cpu_mem_usage": True, "torch_dtype": self.torch_dtype, } if use_offload_folder is not None: t5_model_kwargs["offload_folder"] = use_offload_folder t5_model_kwargs["device_map"] = { "shared": self.device, "encoder.embed_tokens": self.device, "encoder.block.0": self.device, "encoder.block.1": self.device, "encoder.block.2": self.device, "encoder.block.3": self.device, "encoder.block.4": self.device, "encoder.block.5": self.device, "encoder.block.6": self.device, "encoder.block.7": self.device, "encoder.block.8": self.device, "encoder.block.9": self.device, "encoder.block.10": self.device, "encoder.block.11": self.device, "encoder.block.12": "disk", "encoder.block.13": "disk", "encoder.block.14": "disk", "encoder.block.15": "disk", "encoder.block.16": "disk", "encoder.block.17": "disk", "encoder.block.18": "disk", "encoder.block.19": "disk", "encoder.block.20": "disk", "encoder.block.21": "disk", "encoder.block.22": "disk", "encoder.block.23": "disk", "encoder.final_layer_norm": "disk", "encoder.dropout": "disk", } else: t5_model_kwargs["device_map"] = { "shared": self.device, "encoder": self.device, } self.use_text_preprocessing = use_text_preprocessing self.hf_token = hf_token assert from_pretrained in self.available_models self.tokenizer = AutoTokenizer.from_pretrained( from_pretrained, model_max_length=model_max_length, cache_dir=cache_dir, local_files_only=local_files_only, ) self.model = T5EncoderModel.from_pretrained( from_pretrained, cache_dir=cache_dir, local_files_only=local_files_only, **t5_model_kwargs, ).eval() self.model_max_length = model_max_length def get_text_embeddings(self, texts): text_tokens_and_mask = self.tokenizer( texts, max_length=self.model_max_length, padding="longest", truncation=True, return_attention_mask=True, add_special_tokens=True, return_tensors="pt", ) input_ids = text_tokens_and_mask["input_ids"].to(self.device) attention_mask = text_tokens_and_mask["attention_mask"].to(self.device) with torch.no_grad(): text_encoder_embs = self.model( input_ids=input_ids, attention_mask=attention_mask, )["last_hidden_state"].detach() return text_encoder_embs, attention_mask