Queimo commited on
Commit
05d16fd
·
1 Parent(s): e115169

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -0
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import Speech2Text2Processor, SpeechEncoderDecoderModel
4
+ import soundfile as sf
5
+
6
+ # Load the model and processor
7
+ model = SpeechEncoderDecoderModel.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
8
+ processor = Speech2Text2Processor.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
9
+
10
+ # Define the transcription function
11
+ def transcribe_speech(file_info):
12
+ # Read the audio file
13
+ speech, _ = sf.read(file_info)
14
+ # Process the speech
15
+ inputs = processor(speech, sampling_rate=16_000, return_tensors="pt")
16
+ # Generate the transcription
17
+ generated_ids = model.generate(inputs=inputs["input_values"], attention_mask=inputs["attention_mask"])
18
+ # Decode the generated ids to text
19
+ transcription = processor.batch_decode(generated_ids)
20
+
21
+ return transcription[0]
22
+
23
+ # Create the Gradio interface
24
+ iface = gr.Interface(
25
+ fn=transcribe_speech,
26
+ inputs=gr.inputs.Audio(source="upload", type="filepath", label="Upload your MP3 file"),
27
+ outputs="text",
28
+ title="Speech to Text Conversion",
29
+ description="Upload an MP3 file to transcribe it to text using a state-of-the-art speech-to-text model."
30
+ )
31
+
32
+ # Run the Gradio app
33
+ iface.launch()