Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
|
4 |
+
# Load Hugging Face models for speech-to-text and grammar correction
|
5 |
+
# For different languages, you can modify the pipelines to use language-specific models.
|
6 |
+
s2t_en = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr")
|
7 |
+
s2t_fr = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr") # For French
|
8 |
+
s2t_ur = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr") # For Urdu (if available)
|
9 |
+
|
10 |
+
grammar_en = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1")
|
11 |
+
grammar_fr = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1") # For French grammar correction
|
12 |
+
grammar_ur = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1") # For Urdu grammar correction
|
13 |
+
|
14 |
+
def out(audio1, audio2, input_lang, output_lang):
|
15 |
+
if input_lang == "English":
|
16 |
+
s2t_model = s2t_en
|
17 |
+
grammar_model = grammar_en
|
18 |
+
elif input_lang == "French":
|
19 |
+
s2t_model = s2t_fr
|
20 |
+
grammar_model = grammar_fr
|
21 |
+
else:
|
22 |
+
s2t_model = s2t_ur
|
23 |
+
grammar_model = grammar_ur
|
24 |
+
|
25 |
+
# Check if audio is provided
|
26 |
+
if audio1 is None and audio2 is None:
|
27 |
+
return "No audio uploaded", "No audio uploaded"
|
28 |
+
elif audio1 is None:
|
29 |
+
# Use the second audio input (microphone or file)
|
30 |
+
x = s2t_model(audio2)["text"]
|
31 |
+
corrected = grammar_model(x)[0]['generated_text']
|
32 |
+
else:
|
33 |
+
# Use the first audio input (microphone or file)
|
34 |
+
x = s2t_model(audio1)["text"]
|
35 |
+
corrected = grammar_model(x)[0]['generated_text']
|
36 |
+
|
37 |
+
# If output language is different, translate (you can use Hugging Face models for translation)
|
38 |
+
if output_lang == "English":
|
39 |
+
# Placeholder translation model; you should replace this with a suitable translation pipeline
|
40 |
+
# Currently, we are assuming output will be in the same language.
|
41 |
+
translated = corrected
|
42 |
+
elif output_lang == "French":
|
43 |
+
# Placeholder translation model for French
|
44 |
+
translated = corrected
|
45 |
+
else:
|
46 |
+
# Placeholder translation model for Urdu
|
47 |
+
translated = corrected
|
48 |
+
|
49 |
+
return corrected, translated
|
50 |
+
|
51 |
+
|
52 |
+
# Define Gradio Interface
|
53 |
+
iface = gr.Interface(
|
54 |
+
fn=out,
|
55 |
+
title="Speech-to-Text with Grammar Correction and Translation",
|
56 |
+
description="Select input and output language. Upload an audio file or use the microphone to convert speech to text, correct the grammar, and optionally translate it.",
|
57 |
+
inputs=[
|
58 |
+
gr.inputs.Audio(source="upload", type="filepath", label="Upload Audio File (Optional)", optional=True),
|
59 |
+
gr.inputs.Audio(source="microphone", type="filepath", label="Record Using Microphone (Optional)", optional=True),
|
60 |
+
gr.inputs.Dropdown(["English", "French", "Urdu"], label="Input Language", default="English"),
|
61 |
+
gr.inputs.Dropdown(["English", "French", "Urdu"], label="Output Language", default="English")
|
62 |
+
],
|
63 |
+
outputs=["text", "text"],
|
64 |
+
examples=[["Grammar-Correct-Sample.mp3"], ["Grammar-Wrong-Sample.mp3"]],
|
65 |
+
)
|
66 |
+
|
67 |
+
# Launch Gradio Interface
|
68 |
+
iface.launch(enable_queue=True, show_error=True)
|