Spaces:
Running
Running
initial commit
Browse files- Dockerfile +73 -0
- README.md +390 -6
- app.py +196 -0
- requirements.txt +10 -0
Dockerfile
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use Python base image with CPU
|
2 |
+
FROM python:3.10-slim
|
3 |
+
|
4 |
+
# Environment setup
|
5 |
+
ENV PYTHONUNBUFFERED=1
|
6 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
7 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
8 |
+
|
9 |
+
# Install system dependencies
|
10 |
+
RUN apt-get update && apt-get install -y \
|
11 |
+
git \
|
12 |
+
wget \
|
13 |
+
curl \
|
14 |
+
build-essential \
|
15 |
+
python3-dev \
|
16 |
+
libffi-dev \
|
17 |
+
libssl-dev \
|
18 |
+
libjpeg-dev \
|
19 |
+
libpng-dev \
|
20 |
+
libfreetype6-dev \
|
21 |
+
pkg-config \
|
22 |
+
&& rm -rf /var/lib/apt/lists/*
|
23 |
+
|
24 |
+
# Set working directory
|
25 |
+
WORKDIR /app
|
26 |
+
|
27 |
+
# Preinstall Python base tools
|
28 |
+
RUN pip install --no-cache-dir --upgrade pip setuptools wheel
|
29 |
+
|
30 |
+
# Install base ML + tokenizers + NLP tools
|
31 |
+
RUN pip install --no-cache-dir \
|
32 |
+
transformers>=4.33.2 \
|
33 |
+
sentencepiece \
|
34 |
+
sacremoses \
|
35 |
+
nltk \
|
36 |
+
pandas \
|
37 |
+
regex \
|
38 |
+
mock \
|
39 |
+
mosestokenizer \
|
40 |
+
bitsandbytes \
|
41 |
+
scipy \
|
42 |
+
accelerate \
|
43 |
+
datasets
|
44 |
+
|
45 |
+
# Download NLTK data
|
46 |
+
RUN python3 -c "import nltk; nltk.download('punkt')"
|
47 |
+
|
48 |
+
# Install FastAPI app dependencies
|
49 |
+
RUN pip install --no-cache-dir fastapi uvicorn pydantic psutil
|
50 |
+
|
51 |
+
# Clone IndicTransToolkit directly to app and install editable
|
52 |
+
RUN git clone https://github.com/VarunGumma/IndicTransToolkit.git /app/IndicTransToolkit && \
|
53 |
+
pip install -e /app/IndicTransToolkit
|
54 |
+
|
55 |
+
# rm -rf /tmp/IndicTransToolkit
|
56 |
+
|
57 |
+
# Copy app source
|
58 |
+
COPY . .
|
59 |
+
|
60 |
+
# Create non-root user
|
61 |
+
RUN useradd --create-home --shell /bin/bash app
|
62 |
+
RUN chown -R app:app /app
|
63 |
+
USER app
|
64 |
+
|
65 |
+
# Health route port
|
66 |
+
EXPOSE 7860
|
67 |
+
|
68 |
+
# Optional healthcheck (requires you to define GET /health)
|
69 |
+
HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
|
70 |
+
CMD curl -f http://localhost:7860/health || exit 1
|
71 |
+
|
72 |
+
# Run FastAPI
|
73 |
+
CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
@@ -1,11 +1,395 @@
|
|
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
|
|
7 |
pinned: false
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
9 |
---
|
10 |
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# IndicTrans2 Language Server
|
2 |
---
|
3 |
+
title: IndicTrans2 Translator API
|
4 |
+
emoji: 🌐
|
5 |
+
colorFrom: indigo
|
6 |
+
colorTo: blue
|
7 |
sdk: docker
|
8 |
+
app_file: app.py
|
9 |
pinned: false
|
10 |
+
tags:
|
11 |
+
- translation
|
12 |
+
- fastapi
|
13 |
+
- docker
|
14 |
+
- indic
|
15 |
+
license: apache-2.0
|
16 |
---
|
17 |
|
18 |
+
# IndicTrans2 Translator API
|
19 |
+
|
20 |
+
A multilingual FastAPI translation backend powered by AI4Bharat's IndicTrans2.
|
21 |
+
A high-performance FastAPI-based translation server powered by [IndicTrans2](https://github.com/AI4Bharat/IndicTrans2) models for seamless translation between 22 scheduled Indian languages and English.
|
22 |
+
|
23 |
+
## 🌟 Features
|
24 |
+
|
25 |
+
- **Multilingual Support**: Translate between 12 major Indian languages and English
|
26 |
+
- **High-Quality Translations**: Powered by state-of-the-art IndicTrans2 models
|
27 |
+
- **REST API**: Simple HTTP API for easy integration
|
28 |
+
- **GPU Acceleration**: CUDA support for faster inference
|
29 |
+
- **Memory Optimization**: Efficient model loading and GPU memory management
|
30 |
+
- **Graceful Shutdown**: Proper cleanup of resources on server termination
|
31 |
+
|
32 |
+
## 🚀 Quick Start
|
33 |
+
|
34 |
+
### Prerequisites
|
35 |
+
|
36 |
+
- Python 3.7+
|
37 |
+
- CUDA-compatible GPU (recommended)
|
38 |
+
- At least 8GB GPU memory for optimal performance
|
39 |
+
|
40 |
+
### Installation
|
41 |
+
|
42 |
+
1. **Clone the repository**
|
43 |
+
```bash
|
44 |
+
git clone https://github.com/AI4Bharat/IndicTrans2
|
45 |
+
cd IndicTrans2
|
46 |
+
```
|
47 |
+
|
48 |
+
2. **Install dependencies**
|
49 |
+
```bash
|
50 |
+
# Install IndicTrans2 dependencies
|
51 |
+
source install.sh
|
52 |
+
|
53 |
+
# Install additional requirements for the server
|
54 |
+
pip install fastapi uvicorn torch transformers
|
55 |
+
```
|
56 |
+
|
57 |
+
3. **Install IndicTransToolkit**
|
58 |
+
```bash
|
59 |
+
cd huggingface_interface/IndicTransToolkit
|
60 |
+
pip install -e .
|
61 |
+
cd ../..
|
62 |
+
```
|
63 |
+
|
64 |
+
4. **Run the server**
|
65 |
+
```bash
|
66 |
+
python lang_server.py
|
67 |
+
```
|
68 |
+
|
69 |
+
The server will start on `http://0.0.0.0:9000`
|
70 |
+
|
71 |
+
## 📋 Supported Languages
|
72 |
+
|
73 |
+
The server supports translation between the following languages:
|
74 |
+
|
75 |
+
| Language | Code | Script |
|
76 |
+
|----------|------|--------|
|
77 |
+
| English | `eng_Latn` | Latin |
|
78 |
+
| Bengali | `ben_Beng` | Bengali |
|
79 |
+
| Punjabi | `pan_Guru` | Gurmukhi |
|
80 |
+
| Assamese | `asm_Beng` | Bengali |
|
81 |
+
| Konkani | `gom_Deva` | Devanagari |
|
82 |
+
| Gujarati | `guj_Gujr` | Gujarati |
|
83 |
+
| Hindi | `hin_Deva` | Devanagari |
|
84 |
+
| Kannada | `kan_Knda` | Kannada |
|
85 |
+
| Malayalam | `mal_Mlym` | Malayalam |
|
86 |
+
| Odia | `ory_Orya` | Odia |
|
87 |
+
| Tamil | `tam_Taml` | Tamil |
|
88 |
+
| Telugu | `tel_Telu` | Telugu |
|
89 |
+
|
90 |
+
## 🔧 API Usage
|
91 |
+
|
92 |
+
### Translation Endpoint
|
93 |
+
|
94 |
+
**POST** `/language-server/translate`
|
95 |
+
|
96 |
+
#### Request Body
|
97 |
+
|
98 |
+
```json
|
99 |
+
{
|
100 |
+
"input_sentence": "Hello, how are you?",
|
101 |
+
"source_lan": "eng_Latn",
|
102 |
+
"target_lang": "hin_Deva"
|
103 |
+
}
|
104 |
+
```
|
105 |
+
|
106 |
+
#### Response
|
107 |
+
|
108 |
+
```json
|
109 |
+
{
|
110 |
+
"translation": "नमस्ते, आप कैसे हैं?"
|
111 |
+
}
|
112 |
+
```
|
113 |
+
|
114 |
+
#### Error Response
|
115 |
+
|
116 |
+
```json
|
117 |
+
{
|
118 |
+
"message": "Not a valid dialect"
|
119 |
+
}
|
120 |
+
```
|
121 |
+
|
122 |
+
### Example Usage
|
123 |
+
|
124 |
+
#### cURL
|
125 |
+
```bash
|
126 |
+
curl -X POST "http://localhost:9000/language-server/translate" \
|
127 |
+
-H "Content-Type: application/json" \
|
128 |
+
-d '{
|
129 |
+
"input_sentence": "Good morning!",
|
130 |
+
"source_lan": "eng_Latn",
|
131 |
+
"target_lang": "hin_Deva"
|
132 |
+
}'
|
133 |
+
```
|
134 |
+
|
135 |
+
#### Python
|
136 |
+
```python
|
137 |
+
import requests
|
138 |
+
|
139 |
+
url = "http://localhost:9000/language-server/translate"
|
140 |
+
data = {
|
141 |
+
"input_sentence": "Good morning!",
|
142 |
+
"source_lan": "eng_Latn",
|
143 |
+
"target_lang": "hin_Deva"
|
144 |
+
}
|
145 |
+
|
146 |
+
response = requests.post(url, json=data)
|
147 |
+
print(response.json())
|
148 |
+
```
|
149 |
+
|
150 |
+
#### JavaScript
|
151 |
+
```javascript
|
152 |
+
const response = await fetch('http://localhost:9000/language-server/translate', {
|
153 |
+
method: 'POST',
|
154 |
+
headers: {
|
155 |
+
'Content-Type': 'application/json',
|
156 |
+
},
|
157 |
+
body: JSON.stringify({
|
158 |
+
input_sentence: 'Good morning!',
|
159 |
+
source_lan: 'eng_Latn',
|
160 |
+
target_lang: 'hin_Deva'
|
161 |
+
})
|
162 |
+
});
|
163 |
+
|
164 |
+
const result = await response.json();
|
165 |
+
console.log(result);
|
166 |
+
```
|
167 |
+
|
168 |
+
## ⚡ Performance Optimization
|
169 |
+
|
170 |
+
The server is optimized for production use with several performance features:
|
171 |
+
|
172 |
+
### Model Configuration
|
173 |
+
- **Distilled Models**: Uses 200M parameter distilled models for faster inference
|
174 |
+
- **Memory Efficient**: Automatic GPU memory cleanup after each request
|
175 |
+
- **Batch Processing**: Supports batch translation for multiple sentences
|
176 |
+
|
177 |
+
### Recommended Settings for Speed
|
178 |
+
To optimize performance, you can modify the following in `lang_server.py`:
|
179 |
+
|
180 |
+
```python
|
181 |
+
# Enable quantization for faster inference
|
182 |
+
quantization = "4-bit" # or "8-bit"
|
183 |
+
|
184 |
+
# Reduce generation parameters for speed
|
185 |
+
max_length = 128 # Reduced from 256
|
186 |
+
num_beams = 1 # Greedy decoding for fastest results
|
187 |
+
```
|
188 |
+
|
189 |
+
## 🏗️ Architecture
|
190 |
+
|
191 |
+
The server uses a dual-model architecture:
|
192 |
+
|
193 |
+
1. **English → Indic Model**: `ai4bharat/indictrans2-en-indic-dist-200M`
|
194 |
+
2. **Indic → English Model**: `ai4bharat/indictrans2-indic-en-dist-200M`
|
195 |
+
|
196 |
+
The appropriate model is automatically selected based on the target language:
|
197 |
+
- If target is English (`eng_Latn`): Uses Indic→English model
|
198 |
+
- If target is any Indic language: Uses English→Indic model
|
199 |
+
|
200 |
+
## 🔧 Configuration
|
201 |
+
|
202 |
+
### Environment Variables
|
203 |
+
|
204 |
+
| Variable | Default | Description |
|
205 |
+
|----------|---------|-------------|
|
206 |
+
| `BATCH_SIZE` | `4` | Batch size for translation |
|
207 |
+
| `DEVICE` | `cuda` | Device for model inference |
|
208 |
+
|
209 |
+
### Model Selection
|
210 |
+
|
211 |
+
You can switch between different model variants by modifying the checkpoint directories:
|
212 |
+
|
213 |
+
```python
|
214 |
+
# For base models (higher quality, slower)
|
215 |
+
en_indic_ckpt_dir = "ai4bharat/indictrans2-en-indic-1B"
|
216 |
+
|
217 |
+
# For distilled models (faster, good quality)
|
218 |
+
en_indic_ckpt_dir = "ai4bharat/indictrans2-en-indic-dist-200M"
|
219 |
+
```
|
220 |
+
|
221 |
+
## 🐳 Docker Deployment
|
222 |
+
|
223 |
+
Create a `Dockerfile`:
|
224 |
+
|
225 |
+
```dockerfile
|
226 |
+
FROM nvidia/cuda:11.8-devel-ubuntu20.04
|
227 |
+
|
228 |
+
# Install Python and dependencies
|
229 |
+
RUN apt-get update && apt-get install -y python3 python3-pip git
|
230 |
+
WORKDIR /app
|
231 |
+
|
232 |
+
# Clone and setup IndicTrans2
|
233 |
+
RUN git clone https://github.com/AI4Bharat/IndicTrans2 .
|
234 |
+
RUN source install.sh
|
235 |
+
RUN pip install fastapi uvicorn
|
236 |
+
|
237 |
+
# Install IndicTransToolkit
|
238 |
+
WORKDIR /app/huggingface_interface/IndicTransToolkit
|
239 |
+
RUN pip install -e .
|
240 |
+
WORKDIR /app
|
241 |
+
|
242 |
+
# Copy your server file
|
243 |
+
COPY lang_server.py .
|
244 |
+
|
245 |
+
# Expose port
|
246 |
+
EXPOSE 9000
|
247 |
+
|
248 |
+
# Run the server
|
249 |
+
CMD ["python3", "lang_server.py"]
|
250 |
+
```
|
251 |
+
|
252 |
+
Build and run:
|
253 |
+
```bash
|
254 |
+
docker build -t indictrans2-server .
|
255 |
+
docker run --gpus all -p 9000:9000 indictrans2-server
|
256 |
+
```
|
257 |
+
|
258 |
+
## 📊 Benchmarks
|
259 |
+
|
260 |
+
The IndicTrans2 models achieve state-of-the-art performance on various benchmarks:
|
261 |
+
|
262 |
+
- **FLORES-22**: Comprehensive evaluation across 22 languages
|
263 |
+
- **IN22**: New benchmark with 1024 sentences across multiple domains
|
264 |
+
- **chrF++**: Primary evaluation metric for translation quality
|
265 |
+
|
266 |
+
For detailed benchmark results, refer to the [IndicTrans2 paper](https://arxiv.org/abs/2305.16307).
|
267 |
+
|
268 |
+
## 🛠️ Development
|
269 |
+
|
270 |
+
### Running in Development Mode
|
271 |
+
|
272 |
+
```bash
|
273 |
+
# Install development dependencies
|
274 |
+
pip install fastapi[all] uvicorn[standard]
|
275 |
+
|
276 |
+
# Run with auto-reload
|
277 |
+
uvicorn lang_server:app --host 0.0.0.0 --port 9000 --reload
|
278 |
+
```
|
279 |
+
|
280 |
+
### Testing
|
281 |
+
|
282 |
+
```bash
|
283 |
+
# Test the translation endpoint
|
284 |
+
python -c "
|
285 |
+
import requests
|
286 |
+
response = requests.post('http://localhost:9000/language-server/translate',
|
287 |
+
json={'input_sentence': 'Hello', 'source_lan': 'eng_Latn', 'target_lang': 'hin_Deva'})
|
288 |
+
print(response.json())
|
289 |
+
"
|
290 |
+
```
|
291 |
+
|
292 |
+
## 🚦 Production Deployment
|
293 |
+
|
294 |
+
### Using Gunicorn
|
295 |
+
|
296 |
+
```bash
|
297 |
+
pip install gunicorn
|
298 |
+
|
299 |
+
# Run with multiple workers
|
300 |
+
gunicorn lang_server:app -w 4 -k uvicorn.workers.UvicornWorker -b 0.0.0.0:9000
|
301 |
+
```
|
302 |
+
|
303 |
+
### Nginx Configuration
|
304 |
+
|
305 |
+
```nginx
|
306 |
+
server {
|
307 |
+
listen 80;
|
308 |
+
server_name your-domain.com;
|
309 |
+
|
310 |
+
location / {
|
311 |
+
proxy_pass http://127.0.0.1:9000;
|
312 |
+
proxy_set_header Host $host;
|
313 |
+
proxy_set_header X-Real-IP $remote_addr;
|
314 |
+
}
|
315 |
+
}
|
316 |
+
```
|
317 |
+
|
318 |
+
## 🔍 Troubleshooting
|
319 |
+
|
320 |
+
### Common Issues
|
321 |
+
|
322 |
+
1. **CUDA Out of Memory**
|
323 |
+
- Reduce `BATCH_SIZE` in the code
|
324 |
+
- Enable quantization: `quantization = "4-bit"`
|
325 |
+
- Use smaller model variant
|
326 |
+
|
327 |
+
2. **Slow Performance**
|
328 |
+
- Ensure GPU is available and being used
|
329 |
+
- Enable quantization for faster inference
|
330 |
+
- Reduce `max_length` and `num_beams` parameters
|
331 |
+
|
332 |
+
3. **Model Loading Issues**
|
333 |
+
- Check internet connection for model downloading
|
334 |
+
- Verify sufficient disk space (models are ~2GB each)
|
335 |
+
- Ensure proper CUDA installation
|
336 |
+
|
337 |
+
### Monitoring
|
338 |
+
|
339 |
+
```python
|
340 |
+
# Add to your server for monitoring
|
341 |
+
import psutil
|
342 |
+
import GPUtil
|
343 |
+
|
344 |
+
@app.get("/health")
|
345 |
+
def health_check():
|
346 |
+
gpu = GPUtil.getGPUs()[0] if GPUtil.getGPUs() else None
|
347 |
+
return {
|
348 |
+
"status": "healthy",
|
349 |
+
"gpu_memory": f"{gpu.memoryUsed}/{gpu.memoryTotal}MB" if gpu else "No GPU",
|
350 |
+
"cpu_percent": psutil.cpu_percent(),
|
351 |
+
"memory_percent": psutil.virtual_memory().percent
|
352 |
+
}
|
353 |
+
```
|
354 |
+
|
355 |
+
## 📄 License
|
356 |
+
|
357 |
+
This project uses the IndicTrans2 models which are released under the MIT License. See the [LICENSE](https://github.com/AI4Bharat/IndicTrans2/blob/main/LICENSE) file for details.
|
358 |
+
|
359 |
+
## 🤝 Contributing
|
360 |
+
|
361 |
+
1. Fork the repository
|
362 |
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
363 |
+
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
364 |
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
365 |
+
5. Open a Pull Request
|
366 |
+
|
367 |
+
## 🙏 Acknowledgments
|
368 |
+
|
369 |
+
- [AI4Bharat](https://ai4bharat.iitm.ac.in/) for the IndicTrans2 models
|
370 |
+
- [Hugging Face](https://huggingface.co/) for model hosting and transformers library
|
371 |
+
- [FastAPI](https://fastapi.tiangolo.com/) for the excellent web framework
|
372 |
+
|
373 |
+
## 📚 Citation
|
374 |
+
|
375 |
+
If you use this server in your research, please cite the IndicTrans2 paper:
|
376 |
+
|
377 |
+
```bibtex
|
378 |
+
@article{gala2023indictrans,
|
379 |
+
title={IndicTrans2: Towards High-Quality and Accessible Machine Translation Models for all 22 Scheduled Indian Languages},
|
380 |
+
author={Jay Gala and Pranjal A Chitale and A K Raghavan and Varun Gumma and Sumanth Doddapaneni and Aswanth Kumar M and Janki Atul Nawale and Anupama Sujatha and Ratish Puduppully and Vivek Raghavan and Pratyush Kumar and Mitesh M Khapra and Raj Dabre and Anoop Kunchukuttan},
|
381 |
+
journal={Transactions on Machine Learning Research},
|
382 |
+
issn={2835-8856},
|
383 |
+
year={2023},
|
384 |
+
url={https://openreview.net/forum?id=vfT4YuzAYA},
|
385 |
+
}
|
386 |
+
```
|
387 |
+
|
388 |
+
## 🔗 Links
|
389 |
+
|
390 |
+
- [IndicTrans2 GitHub](https://github.com/AI4Bharat/IndicTrans2)
|
391 |
+
- [IndicTrans2 Paper](https://arxiv.org/abs/2305.16307)
|
392 |
+
- [AI4Bharat Website](https://ai4bharat.iitm.ac.in/)
|
393 |
+
- [Demo](https://models.ai4bharat.org/#/nmt/v2)
|
394 |
+
- [Colab Notebook](https://colab.research.google.com/github/AI4Bharat/IndicTrans2/blob/main/huggingface_interface/colab_inference.ipynb)
|
395 |
+
|
app.py
ADDED
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
from fastapi import FastAPI
|
3 |
+
from pydantic import BaseModel
|
4 |
+
import torch
|
5 |
+
from transformers import AutoModelForSeq2SeqLM, BitsAndBytesConfig, AutoTokenizer
|
6 |
+
from IndicTransToolkit.processor import IndicProcessor
|
7 |
+
import signal
|
8 |
+
import sys
|
9 |
+
|
10 |
+
import uvicorn
|
11 |
+
|
12 |
+
BATCH_SIZE = 4
|
13 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
14 |
+
quantization = None
|
15 |
+
|
16 |
+
|
17 |
+
|
18 |
+
def initialize_model_and_tokenizer(ckpt_dir, quantization):
|
19 |
+
if quantization == "4-bit":
|
20 |
+
qconfig = BitsAndBytesConfig(
|
21 |
+
load_in_4bit=True,
|
22 |
+
bnb_4bit_use_double_quant=True,
|
23 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
24 |
+
)
|
25 |
+
elif quantization == "8-bit":
|
26 |
+
qconfig = BitsAndBytesConfig(
|
27 |
+
load_in_8bit=True,
|
28 |
+
bnb_8bit_use_double_quant=True,
|
29 |
+
bnb_8bit_compute_dtype=torch.bfloat16,
|
30 |
+
)
|
31 |
+
else:
|
32 |
+
qconfig = None
|
33 |
+
|
34 |
+
tokenizer = AutoTokenizer.from_pretrained(ckpt_dir, trust_remote_code=True)
|
35 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(
|
36 |
+
ckpt_dir,
|
37 |
+
trust_remote_code=True,
|
38 |
+
low_cpu_mem_usage=True,
|
39 |
+
quantization_config=qconfig,
|
40 |
+
)
|
41 |
+
|
42 |
+
if qconfig is None:
|
43 |
+
model = model.to(DEVICE)
|
44 |
+
# Only use half precision if CUDA is available
|
45 |
+
if DEVICE == "cuda" and torch.cuda.is_available():
|
46 |
+
model.half()
|
47 |
+
|
48 |
+
model.eval()
|
49 |
+
|
50 |
+
return tokenizer, model
|
51 |
+
|
52 |
+
|
53 |
+
def batch_translate(input_sentences, src_lang, tgt_lang, model, tokenizer, ip):
|
54 |
+
translations = []
|
55 |
+
for i in range(0, len(input_sentences), BATCH_SIZE):
|
56 |
+
batch = input_sentences[i : i + BATCH_SIZE]
|
57 |
+
|
58 |
+
# Preprocess the batch and extract entity mappings
|
59 |
+
batch = ip.preprocess_batch(batch, src_lang=src_lang, tgt_lang=tgt_lang)
|
60 |
+
|
61 |
+
# Tokenize the batch and generate input encodings
|
62 |
+
inputs = tokenizer(
|
63 |
+
batch,
|
64 |
+
truncation=True,
|
65 |
+
padding="longest",
|
66 |
+
return_tensors="pt",
|
67 |
+
return_attention_mask=True,
|
68 |
+
).to(DEVICE)
|
69 |
+
|
70 |
+
# Generate translations using the model
|
71 |
+
with torch.no_grad():
|
72 |
+
generated_tokens = model.generate(
|
73 |
+
**inputs,
|
74 |
+
use_cache=True,
|
75 |
+
min_length=0,
|
76 |
+
max_length=256,
|
77 |
+
num_beams=4,
|
78 |
+
num_return_sequences=1,
|
79 |
+
)
|
80 |
+
|
81 |
+
# Decode the generated tokens into text
|
82 |
+
generated_tokens = tokenizer.batch_decode(
|
83 |
+
generated_tokens,
|
84 |
+
skip_special_tokens=True,
|
85 |
+
clean_up_tokenization_spaces=True,
|
86 |
+
)
|
87 |
+
|
88 |
+
# Postprocess the translations, including entity replacement
|
89 |
+
translations += ip.postprocess_batch(generated_tokens, lang=tgt_lang)
|
90 |
+
|
91 |
+
del inputs
|
92 |
+
if torch.cuda.is_available():
|
93 |
+
torch.cuda.empty_cache()
|
94 |
+
|
95 |
+
return translations
|
96 |
+
|
97 |
+
|
98 |
+
# en_indic_ckpt_dir = "ai4bharat/indictrans2-en-indic-1B" # ai4bharat/indictrans2-en-indic-dist-200M
|
99 |
+
en_indic_ckpt_dir = "ai4bharat/indictrans2-en-indic-dist-200M"
|
100 |
+
|
101 |
+
en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(en_indic_ckpt_dir, quantization)
|
102 |
+
|
103 |
+
indic_en_ckpt_dir = "ai4bharat/indictrans2-indic-en-dist-200M"
|
104 |
+
|
105 |
+
indic_en_tokenizer, indic_en_model = initialize_model_and_tokenizer(indic_en_ckpt_dir, quantization)
|
106 |
+
|
107 |
+
ip = IndicProcessor(inference=True)
|
108 |
+
|
109 |
+
|
110 |
+
app = FastAPI()
|
111 |
+
|
112 |
+
|
113 |
+
class Translate(BaseModel):
|
114 |
+
input_sentence : str
|
115 |
+
source_lan : str
|
116 |
+
target_lang: str
|
117 |
+
|
118 |
+
|
119 |
+
lang_list = [
|
120 |
+
"eng_Latn", # Latin English
|
121 |
+
"ben_Beng", # Bengali
|
122 |
+
"pan_Guru", # Punjabi
|
123 |
+
"asm_Beng", # Assamese
|
124 |
+
"gom_Deva", # Konkani
|
125 |
+
"guj_Gujr", # Gujarati
|
126 |
+
"hin_Deva", # Hindi
|
127 |
+
"kan_Knda", # Kannada,
|
128 |
+
"mal_Mlym", # Malayalam
|
129 |
+
"ory_Orya", # Odia,
|
130 |
+
"tam_Taml", # Tamil,
|
131 |
+
"tel_Telu", # Telugu
|
132 |
+
]
|
133 |
+
|
134 |
+
# post method to translate
|
135 |
+
@app.post("/api/v1/translate")
|
136 |
+
def translate(input : Translate):# -> dict[str, Any]:
|
137 |
+
# start time
|
138 |
+
start_time = time.time()
|
139 |
+
if input.source_lan not in lang_list or input.target_lang not in lang_list:
|
140 |
+
return {
|
141 |
+
"message" : "Not a valid dialect",
|
142 |
+
"translation": None
|
143 |
+
}
|
144 |
+
|
145 |
+
model = None
|
146 |
+
tokenizer = None
|
147 |
+
if input.target_lang == "eng_Latn":
|
148 |
+
model = indic_en_model
|
149 |
+
tokenizer = indic_en_tokenizer
|
150 |
+
else:
|
151 |
+
model = en_indic_model
|
152 |
+
tokenizer = en_indic_tokenizer
|
153 |
+
translation = batch_translate(
|
154 |
+
[input.input_sentence], # Note: batch_translate expects a list
|
155 |
+
src_lang=input.source_lan,
|
156 |
+
tgt_lang=input.target_lang,
|
157 |
+
model=model,
|
158 |
+
tokenizer=tokenizer,
|
159 |
+
ip=ip # Don't forget to pass the ip parameter
|
160 |
+
)
|
161 |
+
# Calculate processing time
|
162 |
+
end_time = time.time()
|
163 |
+
processing_time = round(end_time - start_time, 2)
|
164 |
+
return {
|
165 |
+
"message" : f"translation processed successfully in {processing_time} seconds",
|
166 |
+
"translation": translation[0]
|
167 |
+
}
|
168 |
+
|
169 |
+
@app.get("/health")
|
170 |
+
def health_check():
|
171 |
+
return {
|
172 |
+
"status": "healthy",
|
173 |
+
"gpu_available": torch.cuda.is_available(),
|
174 |
+
"gpu_count": torch.cuda.device_count() if torch.cuda.is_available() else 0
|
175 |
+
}
|
176 |
+
|
177 |
+
|
178 |
+
# Signal handler for graceful shutdown
|
179 |
+
def handle_sigterm(signum, frame):
|
180 |
+
print("Received SIGTERM signal. Cleaning up models and exiting...")
|
181 |
+
|
182 |
+
# Delete models to free GPU memory
|
183 |
+
global en_indic_tokenizer, en_indic_model, indic_en_tokenizer, indic_en_model
|
184 |
+
del en_indic_tokenizer, en_indic_model
|
185 |
+
del indic_en_tokenizer, indic_en_model
|
186 |
+
|
187 |
+
if torch.cuda.is_available():
|
188 |
+
torch.cuda.empty_cache()
|
189 |
+
sys.exit(0)
|
190 |
+
|
191 |
+
# Register the signal handler
|
192 |
+
signal.signal(signal.SIGTERM, handle_sigterm)
|
193 |
+
|
194 |
+
|
195 |
+
if __name__ == "__main__":
|
196 |
+
uvicorn.run(app, host="0.0.0.0", port=9000)
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi==0.104.1
|
2 |
+
uvicorn[standard]==0.24.0
|
3 |
+
pydantic==2.5.0
|
4 |
+
psutil==5.9.6
|
5 |
+
transformers==4.35.0
|
6 |
+
accelerate==0.24.1
|
7 |
+
tokenizers==0.15.0
|
8 |
+
sentencepiece==0.1.99
|
9 |
+
sacremoses==0.0.53
|
10 |
+
numpy==1.24.3
|