Spaces:

bektim
/

kzs2t

Runtime error

App Files Files Community

bektim commited on Jan 16

Commit

48cf8b9

verified ·

1 Parent(s): 1c64252

Create app.py

Browse files

Files changed (1) hide show

app.py +65 -0

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import gradio as gr
+import torch
+from transformers import AutoProcessor, SeamlessM4TModel
+class SeamlessM4TApp:
+    def __init__(self):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Using device: {self.device}")
+        # Load model and processor
+        self.processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
+        self.model = SeamlessM4TModel.from_pretrained("facebook/seamless-m4t-v2-large")
+        self.model.to(self.device)
+    def transcribe_audio(self, audio_path):
+        try:
+            # Load and process the audio
+            audio_inputs = self.processor(
+                audios=audio_path,
+                return_tensors="pt",
+                sampling_rate=16000
+            ).to(self.device)
+            # Generate transcription
+            with torch.no_grad():
+                generated_tokens = self.model.generate(
+                    **audio_inputs,
+                    tgt_lang="eng",
+                    task="transcribe"
+                )
+            # Decode the generated tokens
+            transcription = self.processor.decode(
+                generated_tokens[0].tolist(),
+                skip_special_tokens=True
+            )
+            return transcription
+        except Exception as e:
+            return f"Error during transcription: {str(e)}"
+# Initialize the Gradio interface
+def create_interface():
+    app = SeamlessM4TApp()
+    interface = gr.Interface(
+        fn=app.transcribe_audio,
+        inputs=gr.Audio(
+            type="filepath",
+            label="Upload Audio",
+            source="microphone"
+        ),
+        outputs=gr.Textbox(label="Transcription"),
+        title="SeamlessM4T Speech-to-Text",
+        description="Upload audio or use microphone to transcribe speech to text using SeamlessM4T model.",
+        examples=[],
+        cache_examples=False
+    )
+    return interface
+if __name__ == "__main__":
+    interface = create_interface()
+    interface.launch()