SakshiRathi77 commited on
Commit
a48dac6
1 Parent(s): 3915d32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -0
app.py CHANGED
@@ -1,3 +1,91 @@
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  gr.Interface.load("models/SakshiRathi77/wav2vec2-large-xlsr-300m-hi-kagglex").launch()
 
1
+ import torch
2
  import gradio as gr
3
+ import pytube as pt
4
+ from transformers import pipeline
5
+ from huggingface_hub import model_info
6
+ import time
7
+ import unicodedata
8
+
9
+ MODEL_NAME = "SakshiRathi77/wav2vec2-large-xlsr-300m-hi-kagglex"
10
+ lang = "hi"
11
+
12
+ device = 0 if torch.cuda.is_available() else "cpu"
13
+ pipe = pipeline(
14
+ task="automatic-speech-recognition",
15
+ model=MODEL_NAME,
16
+ device=device,
17
+ )
18
+
19
+ def transcribe(microphone, file_upload):
20
+ warn_output = ""
21
+ if (microphone is not None) and (file_upload is not None):
22
+ warn_output = (
23
+ "WARNING: You've uploaded an audio file and used the microphone. "
24
+ "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
25
+ )
26
+
27
+ elif (microphone is None) and (file_upload is None):
28
+ return "ERROR: You have to either use the microphone or upload an audio file"
29
+ file = microphone if microphone is not None else file_upload
30
+ text = pipe(file)["text"]
31
+
32
+ return warn_output + text
33
+
34
+
35
+ # def _return_yt_html_embed(yt_url):
36
+ # video_id = yt_url.split("?v=")[-1]
37
+ # HTML_str = (
38
+ # f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
39
+ # " </center>"
40
+ # )
41
+ # return HTML_str
42
+
43
+
44
+ # def yt_transcribe(yt_url):
45
+ # yt = pt.YouTube(yt_url)
46
+ # html_embed_str = _return_yt_html_embed(yt_url)
47
+ # stream = yt.streams.filter(only_audio=True)[0]
48
+ # stream.download(filename="audio.mp3")
49
+
50
+ # text = pipe("audio.mp3")["text"]
51
+
52
+ # return html_embed_str, text
53
+
54
+ def rt_transcribe(audio, state=""):
55
+ time.sleep(2)
56
+ text = p(audio)["text"]
57
+ state += unicodedata.normalize("NFC",text) + " "
58
+ return state, state
59
+
60
+
61
+ demo = gr.Blocks()
62
+
63
+ examples=[["examples/example1.mp3"], ["examples/example2.mp3"]]
64
+
65
+ description = """
66
+ <p>
67
+ <center>
68
+ Welcome to the HindiSpeechPro, a cutting-edge interface powered by a fine-tuned version of facebook/wav2vec2-xls-r-300m on the common_voice dataset. Easily convert your spoken words to accurate text with just a few clicks.
69
+ </center>
70
+ </p>
71
+ <center>
72
+ <img src="https://huggingface.co/spaces/kingabzpro/real-time-Urdu-ASR/resolve/main/Images/cover.jpg" alt="logo" width="550"/>
73
+ </center>
74
+ """
75
+
76
+ mf_transcribe = gr.Interface(
77
+ fn=transcribe,
78
+ inputs=[
79
+ gr.inputs.Audio(source="microphone", type="filepath"),
80
+ gr.inputs.Audio(source="upload", type="filepath"),
81
+ ],
82
+ outputs="text",
83
+ theme="huggingface",
84
+ title="HindiSpeechPro: WAV2VEC-Powered ASR Interface",
85
+ description= description ,
86
+ allow_flagging="never",
87
+ examples=examples,
88
+ ).launch(share=True )
89
+
90
 
91
  gr.Interface.load("models/SakshiRathi77/wav2vec2-large-xlsr-300m-hi-kagglex").launch()