Spaces:
Build error
Build error
| import torch | |
| import gradio as gr | |
| from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
| from IndicTransToolkit import IndicProcessor | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Load model and tokenizer | |
| model_name = "ai4bharat/indictrans2-indic-en-1B" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True).to(DEVICE) | |
| ip = IndicProcessor(inference=True) | |
| def translate(text, src_lang="hin_Deva", tgt_lang="eng_Latn"): | |
| input_sentences = [text] | |
| batch = ip.preprocess_batch(input_sentences, src_lang=src_lang, tgt_lang=tgt_lang) | |
| inputs = tokenizer(batch, padding="longest", return_tensors="pt", truncation=True).to(DEVICE) | |
| with torch.no_grad(): | |
| generated_tokens = model.generate( | |
| **inputs, | |
| use_cache=True, | |
| min_length=0, | |
| max_length=256, | |
| num_beams=5, | |
| num_return_sequences=1, | |
| ) | |
| with tokenizer.as_target_tokenizer(): | |
| generated_tokens = tokenizer.batch_decode( | |
| generated_tokens.detach().cpu().tolist(), | |
| skip_special_tokens=True, | |
| clean_up_tokenization_spaces=True, | |
| ) | |
| translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang) | |
| return translations[0] | |
| # Gradio UI and API | |
| demo = gr.Interface( | |
| fn=translate, | |
| inputs="text", | |
| outputs="text", | |
| examples=[ | |
| ["जब मैं छोटा था, मैं हर रोज़ पार्क जाता था।"], | |
| ["हमने पिछले सप्ताह एक नई फिल्म देखी जो कि बहुत प्रेरणादायक थी।"] | |
| ], | |
| title="IndicTrans2 Translator", | |
| description="Translate Indic languages to English using AI4Bharat's IndicTrans2 model" | |
| ) | |
| demo.launch() | |