Update README.md
Browse files
README.md
CHANGED
|
@@ -46,74 +46,5 @@ See other Ukrainian models: https://github.com/egorsmkv/speech-recognition-uk
|
|
| 46 |
|
| 47 |
## Overview
|
| 48 |
|
| 49 |
-
This is
|
| 50 |
|
| 51 |
-
|
| 52 |
-
## Metrics
|
| 53 |
-
|
| 54 |
-
- AM (F16):
|
| 55 |
-
- WER: 0.1734 metric, 17.34%
|
| 56 |
-
- CER: 0.0333 metric, 3.33%
|
| 57 |
-
- Accuracy on words: 82.66%
|
| 58 |
-
- Accuracy on chars: 96.67%
|
| 59 |
-
|
| 60 |
-
## Demo
|
| 61 |
-
|
| 62 |
-
Use https://huggingface.co/spaces/Yehor/w2v-bert-uk-v2.1-demo space to see how the model works with your audios.
|
| 63 |
-
|
| 64 |
-
## Usage
|
| 65 |
-
|
| 66 |
-
```python
|
| 67 |
-
# pip install -U torch soundfile transformers
|
| 68 |
-
|
| 69 |
-
import torch
|
| 70 |
-
import soundfile as sf
|
| 71 |
-
from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
|
| 72 |
-
|
| 73 |
-
# Config
|
| 74 |
-
model_name = 'Yehor/w2v-bert-uk-v2.1'
|
| 75 |
-
device = 'cuda:0' # or cpu
|
| 76 |
-
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 77 |
-
sampling_rate = 16_000
|
| 78 |
-
|
| 79 |
-
# Load the model
|
| 80 |
-
asr_model = AutoModelForCTC.from_pretrained(model_name, torch_dtype=torch_dtype).to(device)
|
| 81 |
-
processor = Wav2Vec2BertProcessor.from_pretrained(model_name)
|
| 82 |
-
|
| 83 |
-
paths = [
|
| 84 |
-
'sample1.wav',
|
| 85 |
-
]
|
| 86 |
-
|
| 87 |
-
# Extract audio
|
| 88 |
-
audio_inputs = []
|
| 89 |
-
for path in paths:
|
| 90 |
-
audio_input, _ = sf.read(path)
|
| 91 |
-
audio_inputs.append(audio_input)
|
| 92 |
-
|
| 93 |
-
# Transcribe the audio
|
| 94 |
-
inputs = processor(audio_inputs, sampling_rate=sampling_rate).input_features
|
| 95 |
-
features = torch.tensor(inputs).to(device)
|
| 96 |
-
|
| 97 |
-
with torch.inference_mode():
|
| 98 |
-
logits = asr_model(features).logits
|
| 99 |
-
|
| 100 |
-
predicted_ids = torch.argmax(logits, dim=-1)
|
| 101 |
-
predictions = processor.batch_decode(predicted_ids)
|
| 102 |
-
|
| 103 |
-
# Log results
|
| 104 |
-
print('Predictions:')
|
| 105 |
-
print(predictions)
|
| 106 |
-
```
|
| 107 |
-
|
| 108 |
-
## Cite this work
|
| 109 |
-
|
| 110 |
-
```
|
| 111 |
-
@misc {smoliakov_2025,
|
| 112 |
-
author = { {Smoliakov} },
|
| 113 |
-
title = { w2v-bert-uk-v2.1 (Revision 094c59d) },
|
| 114 |
-
year = 2025,
|
| 115 |
-
url = { https://huggingface.co/Yehor/w2v-bert-uk-v2.1 },
|
| 116 |
-
doi = { 10.57967/hf/4554 },
|
| 117 |
-
publisher = { Hugging Face }
|
| 118 |
-
}
|
| 119 |
-
```
|
|
|
|
| 46 |
|
| 47 |
## Overview
|
| 48 |
|
| 49 |
+
This is the model - https://huggingface.co/Yehor/w2v-bert-uk-v2.1 - where tensors are saved in fp16 format.
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|