Spaces:
Sleeping
Sleeping
sotirios-slv
commited on
Commit
·
fe0ca9c
1
Parent(s):
f0698ec
Removed extra bits, added some print statements
Browse files
app.py
CHANGED
@@ -7,7 +7,6 @@ from transformers import (
|
|
7 |
AutoModelForSpeechSeq2Seq,
|
8 |
AutoProcessor,
|
9 |
pipeline,
|
10 |
-
WhisperProcessor,
|
11 |
)
|
12 |
|
13 |
|
@@ -17,17 +16,12 @@ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
|
17 |
|
18 |
model_id = "openai/whisper-large-v3"
|
19 |
|
20 |
-
# model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
21 |
-
# model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
|
22 |
-
# )
|
23 |
|
24 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
25 |
model_id, low_cpu_mem_usage=True, use_safetensors=True
|
26 |
)
|
27 |
model.to(device)
|
28 |
|
29 |
-
# processor = WhisperProcessor.from_pretrained("openai/whisper-base.en")
|
30 |
-
|
31 |
processor = AutoProcessor.from_pretrained(model_id)
|
32 |
|
33 |
pipe = pipeline(
|
@@ -46,14 +40,18 @@ pipe = pipeline(
|
|
46 |
|
47 |
def transcribe_audio(audio):
|
48 |
result = pipe(audio)
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
50 |
return result
|
51 |
|
52 |
|
53 |
input_audio = gr.Audio(
|
54 |
sources=["microphone"],
|
55 |
type="filepath",
|
56 |
-
# optional=True,
|
57 |
waveform_options=gr.WaveformOptions(
|
58 |
waveform_color="#01C6FF",
|
59 |
waveform_progress_color="#0066B4",
|
|
|
7 |
AutoModelForSpeechSeq2Seq,
|
8 |
AutoProcessor,
|
9 |
pipeline,
|
|
|
10 |
)
|
11 |
|
12 |
|
|
|
16 |
|
17 |
model_id = "openai/whisper-large-v3"
|
18 |
|
|
|
|
|
|
|
19 |
|
20 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
21 |
model_id, low_cpu_mem_usage=True, use_safetensors=True
|
22 |
)
|
23 |
model.to(device)
|
24 |
|
|
|
|
|
25 |
processor = AutoProcessor.from_pretrained(model_id)
|
26 |
|
27 |
pipe = pipeline(
|
|
|
40 |
|
41 |
def transcribe_audio(audio):
|
42 |
result = pipe(audio)
|
43 |
+
print(f'TRANSCRIPTION {result["text"]}')
|
44 |
+
try:
|
45 |
+
for r in result:
|
46 |
+
print(r)
|
47 |
+
except:
|
48 |
+
print("ERROR")
|
49 |
return result
|
50 |
|
51 |
|
52 |
input_audio = gr.Audio(
|
53 |
sources=["microphone"],
|
54 |
type="filepath",
|
|
|
55 |
waveform_options=gr.WaveformOptions(
|
56 |
waveform_color="#01C6FF",
|
57 |
waveform_progress_color="#0066B4",
|