sanchit-gandhi commited on
Commit
74e4942
·
1 Parent(s): 8a02493
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -55,6 +55,7 @@ max_range = np.iinfo(target_dtype).max
55
 
56
 
57
  def get_visualisation(idx):
 
58
  audio = dataset[idx]["audio"]
59
  array = (audio["array"] * max_range).astype(np.int16)
60
  sampling_rate = audio["sampling_rate"]
@@ -64,33 +65,34 @@ def get_visualisation(idx):
64
 
65
  wer_output = process_words(text1, text2, wer_default, wer_default)
66
  wer_percentage = 100 * wer_output.wer
67
- num_insertions = wer_output.insertions
68
 
69
  rel_length = len(text2.split()) / len(text1.split())
70
 
71
  diff = compare_string(text1, text2)
72
  full_text = style_text(diff)
73
 
74
- return (sampling_rate, array), wer_percentage, num_insertions, rel_length, full_text
75
 
76
 
77
  if __name__ == "__main__":
 
78
  with gr.Blocks() as demo:
79
  slider = gr.Slider(
80
- minimum=0, maximum=len(norm_target), step=1, label="Dataset sample"
81
  )
82
  btn = gr.Button("Analyse")
83
  audio_out = gr.Audio(label="Audio input")
84
  with gr.Row():
85
  wer = gr.Number(label="WER")
86
- insertions = gr.Number(label="Insertions")
87
- relative_length = gr.Number(label="Relative length of target / reference")
88
  text_out = gr.Markdown(label="Text difference")
89
 
90
  btn.click(
91
  fn=get_visualisation,
92
  inputs=slider,
93
- outputs=[audio_out, wer, insertions, relative_length, text_out],
94
  )
95
 
96
  demo.launch()
 
55
 
56
 
57
  def get_visualisation(idx):
58
+ idx -= 1
59
  audio = dataset[idx]["audio"]
60
  array = (audio["array"] * max_range).astype(np.int16)
61
  sampling_rate = audio["sampling_rate"]
 
65
 
66
  wer_output = process_words(text1, text2, wer_default, wer_default)
67
  wer_percentage = 100 * wer_output.wer
68
+ rel_insertions = wer_output.insertions / len(text1.split())
69
 
70
  rel_length = len(text2.split()) / len(text1.split())
71
 
72
  diff = compare_string(text1, text2)
73
  full_text = style_text(diff)
74
 
75
+ return (sampling_rate, array), wer_percentage, rel_insertions, rel_length, full_text
76
 
77
 
78
  if __name__ == "__main__":
79
+ gr.Markdown("Analyse the transcriptions generated by the Whisper large-v2 model on the TEDLIUM dev set.")
80
  with gr.Blocks() as demo:
81
  slider = gr.Slider(
82
+ minimum=1, maximum=len(norm_target), step=1, label="Dataset sample"
83
  )
84
  btn = gr.Button("Analyse")
85
  audio_out = gr.Audio(label="Audio input")
86
  with gr.Row():
87
  wer = gr.Number(label="WER")
88
+ relative_insertions = gr.Number(label="Relative insertions (# insertions / target length)")
89
+ relative_length = gr.Number(label="Relative length (reference length / target length)")
90
  text_out = gr.Markdown(label="Text difference")
91
 
92
  btn.click(
93
  fn=get_visualisation,
94
  inputs=slider,
95
+ outputs=[audio_out, wer, relative_insertions, relative_length, text_out],
96
  )
97
 
98
  demo.launch()