Sib83 commited on
Commit
43da485
Β·
verified Β·
1 Parent(s): 077f931

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+
4
+ # Load Hugging Face models for speech-to-text and grammar correction
5
+ # For different languages, you can modify the pipelines to use language-specific models.
6
+ s2t_en = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr")
7
+ s2t_fr = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr") # For French
8
+ s2t_ur = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr") # For Urdu (if available)
9
+
10
+ grammar_en = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1")
11
+ grammar_fr = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1") # For French grammar correction
12
+ grammar_ur = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1") # For Urdu grammar correction
13
+
14
+ def out(audio1, audio2, input_lang, output_lang):
15
+ if input_lang == "English":
16
+ s2t_model = s2t_en
17
+ grammar_model = grammar_en
18
+ elif input_lang == "French":
19
+ s2t_model = s2t_fr
20
+ grammar_model = grammar_fr
21
+ else:
22
+ s2t_model = s2t_ur
23
+ grammar_model = grammar_ur
24
+
25
+ # Check if audio is provided
26
+ if audio1 is None and audio2 is None:
27
+ return "No audio uploaded", "No audio uploaded"
28
+ elif audio1 is None:
29
+ # Use the second audio input (microphone or file)
30
+ x = s2t_model(audio2)["text"]
31
+ corrected = grammar_model(x)[0]['generated_text']
32
+ else:
33
+ # Use the first audio input (microphone or file)
34
+ x = s2t_model(audio1)["text"]
35
+ corrected = grammar_model(x)[0]['generated_text']
36
+
37
+ # If output language is different, translate (you can use Hugging Face models for translation)
38
+ if output_lang == "English":
39
+ # Placeholder translation model; you should replace this with a suitable translation pipeline
40
+ # Currently, we are assuming output will be in the same language.
41
+ translated = corrected
42
+ elif output_lang == "French":
43
+ # Placeholder translation model for French
44
+ translated = corrected
45
+ else:
46
+ # Placeholder translation model for Urdu
47
+ translated = corrected
48
+
49
+ return corrected, translated
50
+
51
+
52
+ # Define Gradio Interface
53
+ iface = gr.Interface(
54
+ fn=out,
55
+ title="Speech-to-Text with Grammar Correction and Translation",
56
+ description="Select input and output language. Upload an audio file or use the microphone to convert speech to text, correct the grammar, and optionally translate it.",
57
+ inputs=[
58
+ gr.inputs.Audio(source="upload", type="filepath", label="Upload Audio File (Optional)", optional=True),
59
+ gr.inputs.Audio(source="microphone", type="filepath", label="Record Using Microphone (Optional)", optional=True),
60
+ gr.inputs.Dropdown(["English", "French", "Urdu"], label="Input Language", default="English"),
61
+ gr.inputs.Dropdown(["English", "French", "Urdu"], label="Output Language", default="English")
62
+ ],
63
+ outputs=["text", "text"],
64
+ examples=[["Grammar-Correct-Sample.mp3"], ["Grammar-Wrong-Sample.mp3"]],
65
+ )
66
+
67
+ # Launch Gradio Interface
68
+ iface.launch(enable_queue=True, show_error=True)