Spaces:
Sleeping
Sleeping
import os | |
import torch | |
import gradio as gr | |
import time | |
from transformers import AutoTokenizer, pipeline | |
# Modelo de Meta capaz de traducir a más de 200 idiomas | |
model = 'facebook/nllb-200-distilled-600M' | |
#model = 'JustFrederik/nllb-200-distilled-600M-ct2-int8' | |
tokenizer = 'facebook/nllb-200-distilled-600M' # El modelo Quantizado no tiene tokenizador | |
# Pero como traduce mal al asturiano, añadimos este otro fine-tuned | |
# Es del proyecto AINA: https://huggingface.co/projecte-aina | |
model_ast = "projecte-aina/aina-translator-es-ast" | |
flores_codes = {} | |
flores_codes["Asturianu"] = "ast_Latn" | |
flores_codes["Castellano"] = "spa_Latn" | |
flores_codes["Català"] = "cat_Latn" | |
flores_codes["English"] = "eng_Latn" | |
flores_codes["Euskera"] = "eus_Latn" | |
flores_codes["Galego"] = "glg_Latn" | |
def translation(source, target, text): | |
#start_time = time.time() | |
source = flores_codes[source] | |
target = flores_codes[target] | |
if target == flores_codes["Asturianu"]: | |
texto_castellano = source | |
if source != flores_codes["Castellano"]: | |
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=flores_codes["Castellano"]) | |
texto_castellano = translator(text, max_length=400) | |
translator_ast = pipeline('translation', model=model_ast, tokenizer=tokenizer, src_lang=flores_codes["Castellano"], tgt_lang=flores_codes["Asturianu"]) | |
output = translator_ast(text, max_length=400) | |
else: | |
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=target) | |
output = translator(text, max_length=400) | |
#end_time = time.time() | |
output = output[0]['translation_text'] | |
#result = {'inference_time': end_time - start_time, | |
# 'source': source, | |
# 'target': target, | |
# 'result': output} | |
#return result | |
return output; | |
if __name__ == '__main__': | |
print('\tIniciando...') | |
# define gradio demo | |
lang_codes = list(flores_codes.keys()) | |
inputs = [gr.Dropdown(lang_codes, value='Castellano', label='Idioma original'), | |
gr.Dropdown(lang_codes, value='Asturianu', label='Traducir al...'), | |
gr.Textbox(label="Texto a traducir"), | |
] | |
outputs = [gr.Textbox(label="Texto traducido"),] | |
title = "Traductor Multilingüe" | |
description = """ | |
Este traductor utiliza el siguiente modelo de lenguaje de Meta: https://github.com/facebookresearch/fairseq/tree/nllb\n | |
Excepto para traducir al asturiano que usa el modelo del proyecto AINA: https://huggingface.co/projecte-aina/aina-translator-es-ast\n | |
Adaptado de: https://huggingface.co/spaces/Azwaw/Text_Translation_Multi-languages | |
""" | |
gr.Interface(translation, | |
inputs, | |
outputs, | |
title=title, | |
description=description, | |
submit_btn="Traducir" | |
).launch() | |