Spaces:
Running
on
Zero
Running
on
Zero
Upload 3 files
Browse files- README.md +6 -5
- app.py +43 -0
- requirements.txt +5 -0
README.md
CHANGED
@@ -1,13 +1,14 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.23.3
|
8 |
app_file: app.py
|
9 |
-
pinned:
|
10 |
license: apache-2.0
|
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Quantum_Speach_Recognizer
|
3 |
+
emoji: π
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: pink
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.23.3
|
8 |
app_file: app.py
|
9 |
+
pinned: true
|
10 |
license: apache-2.0
|
11 |
+
short_description: Speach To Text
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
import os
|
4 |
+
|
5 |
+
# Model ID from Hugging Face
|
6 |
+
model_id = "sbapan41/Quantum_STT"
|
7 |
+
|
8 |
+
# Load the speech recognition pipeline
|
9 |
+
pipe = pipeline(
|
10 |
+
"automatic-speech-recognition",
|
11 |
+
model=model_id,
|
12 |
+
generate_kwargs={"language": "en", "task": "transcribe"},
|
13 |
+
tokenizer=model_id
|
14 |
+
)
|
15 |
+
|
16 |
+
# Transcription function with format check
|
17 |
+
def transcribe(audio):
|
18 |
+
if audio is None:
|
19 |
+
return "Please upload an audio file."
|
20 |
+
|
21 |
+
# Optional: validate file extension
|
22 |
+
ext = os.path.splitext(audio)[1].lower()
|
23 |
+
if ext not in [".caf", ".au", ".opus", ".amr", ".alac", ".aiff", ".wma", ".m4a", ".ogg", ".aac", ".flac", ".wav", ".mp3"]:
|
24 |
+
return f"β Unsupported file format: {ext}. Please upload .caf, .au, .opus, .amr, .alac, .aiff, .wma, .m4a, .ogg, .aac, .flac, .wav or .mp3 files."
|
25 |
+
|
26 |
+
result = pipe(audio)
|
27 |
+
return result["text"]
|
28 |
+
|
29 |
+
# Gradio interface
|
30 |
+
interface = gr.Interface(
|
31 |
+
fn=transcribe,
|
32 |
+
inputs=gr.Audio(
|
33 |
+
type="filepath", # return audio file path
|
34 |
+
sources=["upload"], # restrict to file upload (not mic)
|
35 |
+
label="π΅ Upload Audio File"
|
36 |
+
),
|
37 |
+
outputs=gr.Textbox(label="π Transcription"),
|
38 |
+
title="ποΈ Quantum Speech Recognizer",
|
39 |
+
description="Upload an audio file (.caf, .au, .opus, .amr, .alac, .aiff, .wma, .m4a, .ogg, .aac, .flac, .wav, .mp3)<br>***to transcribe it using the Quantum_STT model***."
|
40 |
+
)
|
41 |
+
|
42 |
+
# Launch the interface
|
43 |
+
interface.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio>=4.0.0
|
2 |
+
transformers==4.37.2
|
3 |
+
torch>=2.0.0
|
4 |
+
torchaudio
|
5 |
+
accelerate>=0.21.0
|