pierreguillou commited on
Commit
048e1ca
1 Parent(s): b129e61

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from faster_whisper import WhisperModel
4
+ import pandas as pd
5
+
6
+ model_size = "large-v2"
7
+
8
+ # get device
9
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
10
+
11
+ if device == "cuda:0":
12
+ # Run on GPU with FP16
13
+ model_whisper = WhisperModel(model_size, device="cuda", compute_type="float16")
14
+ # or Run on GPU with INT8
15
+ # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
16
+ else:
17
+ # Run on CPU with INT8
18
+ model_whisper = WhisperModel(model_size, device="cpu", compute_type="int8")
19
+
20
+ def get_filename(file_obj):
21
+ return file_obj.name.split("/")[-1]
22
+
23
+ def audio_to_transcript(file_obj):
24
+ # get all audio segments
25
+ segments, _ = model_whisper.transcribe(file_obj.name, beam_size=5, vad_filter=True)
26
+ print("start")
27
+ start_segments, end_segments, text_segments = list(), list(), list()
28
+ for segment in segments:
29
+ start, end, text = segment.start, segment.end, segment.text
30
+ start_segments.append(start)
31
+ end_segments.append(end)
32
+ text_segments.append(text)
33
+
34
+ # save transcript into csv
35
+ df = pd.DataFrame()
36
+ df["start"] = start_segments
37
+ df["end"] = end_segments
38
+ df["text"] = text_segments
39
+
40
+ print(df)
41
+
42
+ return get_filename(file_obj), df
43
+
44
+ ## Gradio interface
45
+ headers = ["start", "end", "text"]
46
+ iface = gr.Interface(fn=audio_to_transcript,
47
+ inputs=gr.File(label="Audio file"),
48
+ outputs=[
49
+ gr.Textbox(label="Name of the audio file"),
50
+ gr.DataFrame(label="Transcript", headers=headers),
51
+ ],
52
+ allow_flagging="never",
53
+ title="Audio to Transcript",
54
+ description="Just paste any audio file and get its corresponding transcript with timeline.",
55
+ )
56
+ iface.launch()