jonatasgrosman commited on
Commit
512c442
·
1 Parent(s): 0d74fa6

adjust README

Browse files
Files changed (1) hide show
  1. README.md +22 -10
README.md CHANGED
@@ -24,10 +24,10 @@ model-index:
24
  metrics:
25
  - name: Test WER
26
  type: wer
27
- value: 12.18
28
  - name: Test CER
29
  type: cer
30
- value: 11.01
31
  ---
32
 
33
  # Wav2Vec2-Large-XLSR-53-portuguese
@@ -49,8 +49,9 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
49
 
50
  LANG_ID = "pt"
51
  MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-portuguese"
 
52
 
53
- test_dataset = load_dataset("common_voice", LANG_ID, split="test[:2%]")
54
 
55
  processor = Wav2Vec2Processor.from_pretrained(MODEL_ID)
56
  model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
@@ -64,17 +65,29 @@ def speech_file_to_array_fn(batch):
64
  return batch
65
 
66
  test_dataset = test_dataset.map(speech_file_to_array_fn)
67
- inputs = processor(test_dataset[:2]["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
68
 
69
  with torch.no_grad():
70
  logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
71
 
72
  predicted_ids = torch.argmax(logits, dim=-1)
 
73
 
74
- print("Prediction:", processor.batch_decode(predicted_ids))
75
- print("Reference:", test_dataset[:2]["sentence"])
 
 
76
  ```
77
 
 
 
 
 
 
 
 
 
 
78
  ## Evaluation
79
 
80
  The model can be evaluated as follows on the Portuguese test data of Common Voice.
@@ -90,7 +103,7 @@ LANG_ID = "pt"
90
  MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-portuguese"
91
  DEVICE = "cuda"
92
 
93
- CHARS_TO_IGNORE = [",", "?", "¿", ".", "!", "¡", "-", ";", ":", '""', "%", "'", '"', "�", "ʿ", "·", "჻", "~", "՞",
94
  "؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
95
  "=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ", "‹", "›", "©", "®", "—", "→", "。"]
96
 
@@ -134,6 +147,5 @@ print("CER: {:2f}".format(100 * cer.compute(predictions=result["pred_strings"],
134
 
135
  **Test Result**:
136
 
137
- WER: 12.18%
138
-
139
- CER: 11.01%
 
24
  metrics:
25
  - name: Test WER
26
  type: wer
27
+ value: 12.51
28
  - name: Test CER
29
  type: cer
30
+ value: 13.59
31
  ---
32
 
33
  # Wav2Vec2-Large-XLSR-53-portuguese
 
49
 
50
  LANG_ID = "pt"
51
  MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-portuguese"
52
+ SAMPLES = 5
53
 
54
+ test_dataset = load_dataset("common_voice", LANG_ID, split=f"test[:{SAMPLES}]")
55
 
56
  processor = Wav2Vec2Processor.from_pretrained(MODEL_ID)
57
  model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
 
65
  return batch
66
 
67
  test_dataset = test_dataset.map(speech_file_to_array_fn)
68
+ inputs = processor(test_dataset["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
69
 
70
  with torch.no_grad():
71
  logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
72
 
73
  predicted_ids = torch.argmax(logits, dim=-1)
74
+ predicted_sentences = processor.batch_decode(predicted_ids)
75
 
76
+ for i, predicted_sentence in enumerate(predicted_sentences):
77
+ print("-" * 100)
78
+ print("Reference:", test_dataset[i]["sentence"])
79
+ print("Prediction:", predicted_sentence)
80
  ```
81
 
82
+ | Reference | Prediction |
83
+ | ------------- | ------------- |
84
+ | NEM O RADAR NEM OS OUTROS INSTRUMENTOS DETECTARAM O BOMBARDEIRO STEALTH. | NEM UM VADA ME OS OUTOS INSTRUMENTOS DE TETERAM UM BAMBEDER OSTAU |
85
+ | PEDIR DINHEIRO EMPRESTADO ÀS PESSOAS DA ALDEIA | PEDIAR DINHEIRO EMPRESTADO DÀS PESSOAS DA ALDEIA |
86
+ | PEDIR DINHEIRO EMPRESTADO ÀS PESSOAS DA ALDEIA | PEDIAR DINHEIRO EMPRESTADO DÀS PESSOAS DA ALDEIA |
87
+ | OITO | OITO |
88
+ | TRANCÁ-LOS | TRAM CALDOS |
89
+ | REALIZAR UMA INVESTIGAÇÃO PARA RESOLVER O PROBLEMA | REALIZARAMA INVESTIGAÇÃO PARA RESOLVER O PROBLEMA |
90
+
91
  ## Evaluation
92
 
93
  The model can be evaluated as follows on the Portuguese test data of Common Voice.
 
103
  MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-portuguese"
104
  DEVICE = "cuda"
105
 
106
+ CHARS_TO_IGNORE = [",", "?", "¿", ".", "!", "¡", ";", ":", '""', "%", '"', "�", "ʿ", "·", "჻", "~", "՞",
107
  "؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
108
  "=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ", "‹", "›", "©", "®", "—", "→", "。"]
109
 
 
147
 
148
  **Test Result**:
149
 
150
+ - WER: 12.51%
151
+ - CER: 13.59%