add evaluation

Files changed (9) hide show

README.md CHANGED Viewed

@@ -9,13 +9,38 @@ tags:
 - en
 - generated_from_trainer
 model-index:
-- name: ''
-  results: []
 ---
-<!-- This model card has been generated automatically according to the information the Trainer had access to. You
-should probably proofread and complete it, then remove this comment. -->
 #
 This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the librispeech_asr dataset.
@@ -23,6 +48,7 @@ It achieves the following results on the evaluation set:
 - Loss: 0.1444
 - Wer: 0.1167
 ## Model description
 More information needed

 - en
 - generated_from_trainer
 model-index:
+- name: XLS-R-300M - English
+  results:
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: LibriSpeech ASR
+      type: librispeech_asr
+      args: clean
+    metrics:
+       - name: Test WER
+         type: wer
+         value: 12.29
+       - name: Test CER
+         type: cer
+         value: 3.34
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: Robust Speech Event - Dev Data
+      type: speech-recognition-community-v2/dev_data
+      args: en
+    metrics:
+       - name: Validation WER
+         type: wer
+         value: 36.75
+       - name: Validation CER
+         type: cer
+         value: 14.83
 ---
 #
 This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the librispeech_asr dataset.
 - Loss: 0.1444
 - Wer: 0.1167
 ## Model description
 More information needed

eval.py CHANGED Viewed

@@ -44,7 +44,7 @@ def log_results(result: Dataset, args: Dict[str, str]):
                 p.write(f"{i}" + "\n")
                 p.write(batch["prediction"] + "\n")
                 t.write(f"{i}" + "\n")
-                t.write(batch["target"] + "\n")
             result.map(write_to_file, with_indices=True)
@@ -62,11 +62,6 @@ def normalize_text(text: str) -> str:
     for t in token_sequences_to_ignore:
         text = " ".join(text.split(t))
-    kakasi = pykakasi.kakasi()
-    tagger = fugashi.Tagger()
-    text = "".join([item['hira'] for item in kakasi.convert(text)])
-    text = " ".join([word.surface for word in tagger(text)])
     return text
@@ -97,7 +92,7 @@ def main(args):
         )
         batch["prediction"] = prediction["text"]
-        batch["target"] = normalize_text(batch["text"])
         return batch
     # run inference on all examples
@@ -123,6 +118,9 @@ if __name__ == "__main__":
     parser.add_argument(
         "--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'`  for Common Voice"
     )
     parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`")
     parser.add_argument(
         "--chunk_length_s", type=float, default=None, help="Chunk length in seconds. Defaults to 5 seconds."

                 p.write(f"{i}" + "\n")
                 p.write(batch["prediction"] + "\n")
                 t.write(f"{i}" + "\n")
+                t.write(batch['target'] + "\n")
             result.map(write_to_file, with_indices=True)
     for t in token_sequences_to_ignore:
         text = " ".join(text.split(t))
     return text
         )
         batch["prediction"] = prediction["text"]
+        batch["target"] = normalize_text(batch[args.sentence_column])
         return batch
     # run inference on all examples
     parser.add_argument(
         "--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'`  for Common Voice"
     )
+    parser.add_argument(
+        "--sentence_column", type=str, required=True, help="Name of column that holds text label"
+    )
     parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`")
     parser.add_argument(
         "--chunk_length_s", type=float, default=None, help="Chunk length in seconds. Defaults to 5 seconds."

eval.sh CHANGED Viewed

@@ -1,17 +1,17 @@
 ./eval.py \
 --model_id . \
---dataset "librispeech_asr" \
---config clean \
---split test \
 --chunk_length_s 5.0 \
 --stride_length_s 1.0 \
---log_outputs
-# ./eval.py \
-# --model_id vitouphy/xls-r-300m-ja \
-# --dataset "speech-recognition-community-v2/dev_data" \
-# --config ja \
-# --split validation \
-# --chunk_length_s 5.0 \
-# --stride_length_s 1.0 \
-# --log_outputs

+# ./eval.py \
+# --model_id . \
+# --dataset "librispeech_asr" \
+# --config clean \
+# --split test \
+# --sentence_column "text" \
+# --log_outputs
 ./eval.py \
 --model_id . \
+--dataset "speech-recognition-community-v2/dev_data" \
+--config en \
+--split validation \
+--sentence_column "sentence" \
 --chunk_length_s 5.0 \
 --stride_length_s 1.0 \
+--log_outputs

librispeech_asr_clean_test_eval_results.txt ADDED Viewed

log_librispeech_asr_clean_test_predictions.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

log_librispeech_asr_clean_test_targets.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

log_speech-recognition-community-v2_dev_data_en_validation_predictions.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

log_speech-recognition-community-v2_dev_data_en_validation_targets.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

speech-recognition-community-v2_dev_data_en_validation_eval_results.txt ADDED Viewed