Spaces:
Running
Running
| from .v3 import GoogleAuthTranslation | |
| from pathlib import Path | |
| import yaml | |
| import os | |
| MAX_ENG_TEXT_LENGTH = int(os.getenv('MAX_ENG_TEXT_LENGTH', 200)) | |
| MAX_CJK_TEXT_LENGTH = int(os.getenv('MAX_CJK_TEXT_LENGTH', 100)) | |
| class Translator: | |
| def __init__(self, yaml_path='./lang.yaml'): | |
| self.google_translation = GoogleAuthTranslation(project_id="cvpr-2022-demonstration") | |
| with open(yaml_path) as f: | |
| self.supporting_languages = yaml.load(f, Loader=yaml.FullLoader) | |
| def length_check(lang, text): | |
| if lang in ['en']: | |
| if len(text) > MAX_ENG_TEXT_LENGTH: | |
| raise AssertionError(f"Input text is too long. For English, the text length should be less than {MAX_ENG_TEXT_LENGTH}. | Length: {len(text)}") | |
| elif lang in ['ko', 'ja', 'zh-CN', 'zh']: | |
| if len(text) > MAX_CJK_TEXT_LENGTH: | |
| raise AssertionError(f"Input text is too long. For CJK, the text length should be less than {MAX_CJK_TEXT_LENGTH}. | Length: {len(text)}") | |
| else: | |
| raise AssertionError(f"Not in ['ko', 'ja', 'zh-CN', 'zh', 'en'] ! | Language: {lang}") | |
| return | |
| def _get_text_with_lang(self, text, lang): | |
| lang_detected = self.google_translation.detect(text) | |
| print(f"Detected as: {lang_detected} | Destination: {lang}") | |
| if lang is None: | |
| lang = lang_detected | |
| if lang != lang_detected: | |
| target_text = self.google_translation.translate(text, lang=lang) | |
| else: | |
| target_text = text | |
| return target_text, lang | |
| def _convert_lang_from_index(self, lang): | |
| try: | |
| lang = [name for name in self.supporting_languages | |
| if self.supporting_languages[name]['language'] == lang][0] | |
| except Exception as e: | |
| raise RuntimeError(e) | |
| return lang | |
| def get_translation(self, text, lang, use_translation=True): | |
| lang_ = self._convert_lang_from_index(lang) | |
| if use_translation: | |
| target_text, _ = self._get_text_with_lang(text, lang_) | |
| else: | |
| target_text = text | |
| return target_text, lang_ |