sanchit-gandhi
commited on
Commit
·
74e4942
1
Parent(s):
8a02493
up
Browse files
app.py
CHANGED
@@ -55,6 +55,7 @@ max_range = np.iinfo(target_dtype).max
|
|
55 |
|
56 |
|
57 |
def get_visualisation(idx):
|
|
|
58 |
audio = dataset[idx]["audio"]
|
59 |
array = (audio["array"] * max_range).astype(np.int16)
|
60 |
sampling_rate = audio["sampling_rate"]
|
@@ -64,33 +65,34 @@ def get_visualisation(idx):
|
|
64 |
|
65 |
wer_output = process_words(text1, text2, wer_default, wer_default)
|
66 |
wer_percentage = 100 * wer_output.wer
|
67 |
-
|
68 |
|
69 |
rel_length = len(text2.split()) / len(text1.split())
|
70 |
|
71 |
diff = compare_string(text1, text2)
|
72 |
full_text = style_text(diff)
|
73 |
|
74 |
-
return (sampling_rate, array), wer_percentage,
|
75 |
|
76 |
|
77 |
if __name__ == "__main__":
|
|
|
78 |
with gr.Blocks() as demo:
|
79 |
slider = gr.Slider(
|
80 |
-
minimum=
|
81 |
)
|
82 |
btn = gr.Button("Analyse")
|
83 |
audio_out = gr.Audio(label="Audio input")
|
84 |
with gr.Row():
|
85 |
wer = gr.Number(label="WER")
|
86 |
-
|
87 |
-
relative_length = gr.Number(label="Relative length
|
88 |
text_out = gr.Markdown(label="Text difference")
|
89 |
|
90 |
btn.click(
|
91 |
fn=get_visualisation,
|
92 |
inputs=slider,
|
93 |
-
outputs=[audio_out, wer,
|
94 |
)
|
95 |
|
96 |
demo.launch()
|
|
|
55 |
|
56 |
|
57 |
def get_visualisation(idx):
|
58 |
+
idx -= 1
|
59 |
audio = dataset[idx]["audio"]
|
60 |
array = (audio["array"] * max_range).astype(np.int16)
|
61 |
sampling_rate = audio["sampling_rate"]
|
|
|
65 |
|
66 |
wer_output = process_words(text1, text2, wer_default, wer_default)
|
67 |
wer_percentage = 100 * wer_output.wer
|
68 |
+
rel_insertions = wer_output.insertions / len(text1.split())
|
69 |
|
70 |
rel_length = len(text2.split()) / len(text1.split())
|
71 |
|
72 |
diff = compare_string(text1, text2)
|
73 |
full_text = style_text(diff)
|
74 |
|
75 |
+
return (sampling_rate, array), wer_percentage, rel_insertions, rel_length, full_text
|
76 |
|
77 |
|
78 |
if __name__ == "__main__":
|
79 |
+
gr.Markdown("Analyse the transcriptions generated by the Whisper large-v2 model on the TEDLIUM dev set.")
|
80 |
with gr.Blocks() as demo:
|
81 |
slider = gr.Slider(
|
82 |
+
minimum=1, maximum=len(norm_target), step=1, label="Dataset sample"
|
83 |
)
|
84 |
btn = gr.Button("Analyse")
|
85 |
audio_out = gr.Audio(label="Audio input")
|
86 |
with gr.Row():
|
87 |
wer = gr.Number(label="WER")
|
88 |
+
relative_insertions = gr.Number(label="Relative insertions (# insertions / target length)")
|
89 |
+
relative_length = gr.Number(label="Relative length (reference length / target length)")
|
90 |
text_out = gr.Markdown(label="Text difference")
|
91 |
|
92 |
btn.click(
|
93 |
fn=get_visualisation,
|
94 |
inputs=slider,
|
95 |
+
outputs=[audio_out, wer, relative_insertions, relative_length, text_out],
|
96 |
)
|
97 |
|
98 |
demo.launch()
|