sanchit-gandhi commited on
Commit
9d85ee2
·
1 Parent(s): 74e4942

load faster

Browse files
Files changed (1) hide show
  1. app.py +14 -4
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import numpy as np
2
  import unicodedata
3
  import diff_match_patch as dmp_module
@@ -40,7 +42,9 @@ def style_text(diff):
40
  return fullText
41
 
42
 
43
- dataset = load_dataset("distil-whisper/tedlium-long-form", split="validation")
 
 
44
 
45
  csv = pd.read_csv("assets/large-v2.csv")
46
 
@@ -76,7 +80,9 @@ def get_visualisation(idx):
76
 
77
 
78
  if __name__ == "__main__":
79
- gr.Markdown("Analyse the transcriptions generated by the Whisper large-v2 model on the TEDLIUM dev set.")
 
 
80
  with gr.Blocks() as demo:
81
  slider = gr.Slider(
82
  minimum=1, maximum=len(norm_target), step=1, label="Dataset sample"
@@ -85,8 +91,12 @@ if __name__ == "__main__":
85
  audio_out = gr.Audio(label="Audio input")
86
  with gr.Row():
87
  wer = gr.Number(label="WER")
88
- relative_insertions = gr.Number(label="Relative insertions (# insertions / target length)")
89
- relative_length = gr.Number(label="Relative length (reference length / target length)")
 
 
 
 
90
  text_out = gr.Markdown(label="Text difference")
91
 
92
  btn.click(
 
1
+ import os
2
+
3
  import numpy as np
4
  import unicodedata
5
  import diff_match_patch as dmp_module
 
42
  return fullText
43
 
44
 
45
+ dataset = load_dataset(
46
+ "distil-whisper/tedlium-long-form", split="validation", num_proc=os.cpu_count()
47
+ )
48
 
49
  csv = pd.read_csv("assets/large-v2.csv")
50
 
 
80
 
81
 
82
  if __name__ == "__main__":
83
+ gr.Markdown(
84
+ "Analyse the transcriptions generated by the Whisper large-v2 model on the TEDLIUM dev set."
85
+ )
86
  with gr.Blocks() as demo:
87
  slider = gr.Slider(
88
  minimum=1, maximum=len(norm_target), step=1, label="Dataset sample"
 
91
  audio_out = gr.Audio(label="Audio input")
92
  with gr.Row():
93
  wer = gr.Number(label="WER")
94
+ relative_insertions = gr.Number(
95
+ label="Relative insertions (# insertions / target length)"
96
+ )
97
+ relative_length = gr.Number(
98
+ label="Relative length (reference length / target length)"
99
+ )
100
  text_out = gr.Markdown(label="Text difference")
101
 
102
  btn.click(