cahya
/

wav2vec2-base-turkish

Automatic Speech Recognition

Generated from Trainer

hf-asr-leaderboard

robust-speech-event

Model card Files Files and versions

cahya commited on Feb 3, 2022

Commit

ef9c04d

·

1 Parent(s): b878951

add run_evaluation.py

Files changed (1) hide show

run_evaluation.py +3 -1

run_evaluation.py CHANGED Viewed

@@ -22,7 +22,9 @@ class KenLM:
         vocab_dict = tokenizer.get_vocab()
         self.vocabulary = [x[0] for x in sorted(vocab_dict.items(), key=lambda x: x[1], reverse=False)]
         self.vocabulary = self.vocabulary[:-2]
-        self.decoder = build_ctcdecoder(self.vocabulary, model_name, unigrams=unigrams)
     @staticmethod
     def lm_postprocess(text):

         vocab_dict = tokenizer.get_vocab()
         self.vocabulary = [x[0] for x in sorted(vocab_dict.items(), key=lambda x: x[1], reverse=False)]
         self.vocabulary = self.vocabulary[:-2]
+        with open(unigrams, "r") as f:
+            unigrams_text = f.read()
+            self.decoder = build_ctcdecoder(self.vocabulary, model_name, unigrams=unigrams_text)
     @staticmethod
     def lm_postprocess(text):