| from transformers import T5Tokenizer | |
| from typing import List, Optional, Tuple, Union | |
| class OpenMoeTokenizer(T5Tokenizer): | |
| def __init__(self, *args, **kwargs): | |
| super().__init__(*args, **kwargs) | |
| self.padding_side = 'left' | |
| self.add_bos_token = True | |
| self.add_eos_token = False | |
| def build_inputs_with_special_tokens( | |
| self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None | |
| ) -> List[int]: | |
| if self.add_eos_token: | |
| token_ids_0 = self._add_eos_if_not_present(token_ids_0) | |
| if self.add_bos_token: | |
| token_ids_0 = [self.pad_token_id] + token_ids_0 | |
| if token_ids_1 is None: | |
| return token_ids_0 | |
| else: | |
| token_ids_1 = self._add_eos_if_not_present(token_ids_1) | |
| return token_ids_0 + token_ids_1 |