bektim commited on
Commit
48cf8b9
·
verified ·
1 Parent(s): 1c64252

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoProcessor, SeamlessM4TModel
4
+
5
+ class SeamlessM4TApp:
6
+ def __init__(self):
7
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
8
+ print(f"Using device: {self.device}")
9
+
10
+ # Load model and processor
11
+ self.processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
12
+ self.model = SeamlessM4TModel.from_pretrained("facebook/seamless-m4t-v2-large")
13
+ self.model.to(self.device)
14
+
15
+ def transcribe_audio(self, audio_path):
16
+ try:
17
+ # Load and process the audio
18
+ audio_inputs = self.processor(
19
+ audios=audio_path,
20
+ return_tensors="pt",
21
+ sampling_rate=16000
22
+ ).to(self.device)
23
+
24
+ # Generate transcription
25
+ with torch.no_grad():
26
+ generated_tokens = self.model.generate(
27
+ **audio_inputs,
28
+ tgt_lang="eng",
29
+ task="transcribe"
30
+ )
31
+
32
+ # Decode the generated tokens
33
+ transcription = self.processor.decode(
34
+ generated_tokens[0].tolist(),
35
+ skip_special_tokens=True
36
+ )
37
+
38
+ return transcription
39
+
40
+ except Exception as e:
41
+ return f"Error during transcription: {str(e)}"
42
+
43
+ # Initialize the Gradio interface
44
+ def create_interface():
45
+ app = SeamlessM4TApp()
46
+
47
+ interface = gr.Interface(
48
+ fn=app.transcribe_audio,
49
+ inputs=gr.Audio(
50
+ type="filepath",
51
+ label="Upload Audio",
52
+ source="microphone"
53
+ ),
54
+ outputs=gr.Textbox(label="Transcription"),
55
+ title="SeamlessM4T Speech-to-Text",
56
+ description="Upload audio or use microphone to transcribe speech to text using SeamlessM4T model.",
57
+ examples=[],
58
+ cache_examples=False
59
+ )
60
+
61
+ return interface
62
+
63
+ if __name__ == "__main__":
64
+ interface = create_interface()
65
+ interface.launch()