Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -42,6 +42,7 @@ from fairseq import checkpoint_utils, options, tasks, utils
|
|
42 |
from fairseq.dataclass.configs import GenerationConfig
|
43 |
from huggingface_hub import hf_hub_download
|
44 |
import gradio as gr
|
|
|
45 |
|
46 |
# os.chdir('/home/user/app/av_hubert/avhubert')
|
47 |
|
@@ -131,18 +132,44 @@ def predict(process_video):
|
|
131 |
|
132 |
|
133 |
# ---- Gradio Layout -----
|
|
|
134 |
video_in = gr.Video(label="Input Video", mirror_webcam=False, interactive=True)
|
135 |
-
video_out = gr.Video(label="Audio Visual Video", mirror_webcam=False, interactive=True)
|
136 |
-
text_output = gr.Textbox()
|
137 |
demo = gr.Blocks()
|
138 |
demo.encrypt = False
|
|
|
|
|
139 |
with demo:
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
|
|
145 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
video_in.render()
|
147 |
video_out.render()
|
148 |
with gr.Row():
|
|
|
42 |
from fairseq.dataclass.configs import GenerationConfig
|
43 |
from huggingface_hub import hf_hub_download
|
44 |
import gradio as gr
|
45 |
+
from pytube import YouTube
|
46 |
|
47 |
# os.chdir('/home/user/app/av_hubert/avhubert')
|
48 |
|
|
|
132 |
|
133 |
|
134 |
# ---- Gradio Layout -----
|
135 |
+
youtube_url_in = gr.Textbox(label="Youtube url", lines=1, interactive=True)
|
136 |
video_in = gr.Video(label="Input Video", mirror_webcam=False, interactive=True)
|
137 |
+
video_out = gr.Video(label="Audio Visual Video", mirror_webcam=False, interactive=True)
|
|
|
138 |
demo = gr.Blocks()
|
139 |
demo.encrypt = False
|
140 |
+
text_output = gr.Textbox()
|
141 |
+
|
142 |
with demo:
|
143 |
+
gr.Markdown('''
|
144 |
+
<div>
|
145 |
+
<h1 style='text-align: center'>Speech Recognition from Visual Lip Movement by Audio-Visual Hidden Unit BERT Model (Avhubert)</h1>
|
146 |
+
This space uses Avhubert models from <a href='https://github.com/facebookresearch' target='_blank'><b>Meta Research</b></a> to recoginze the speech from Lip Movement 🤗
|
147 |
+
</div>
|
148 |
+
''')
|
149 |
with gr.Row():
|
150 |
+
gr.Markdown('''
|
151 |
+
### Reading Lip movement with youtube link using Avhubert
|
152 |
+
##### Step 1a. Download video from youtube (Note: the length of video should be less than 10 seconds if not it will be cut and the face should be stable for better result)
|
153 |
+
##### Step 1b. You also can upload video directly
|
154 |
+
##### Step 2. Generating landmarks surrounding mouth area
|
155 |
+
##### Step 3. Reading lip movement.
|
156 |
+
''')
|
157 |
+
with gr.Row():
|
158 |
+
gr.Markdown('''
|
159 |
+
### You can test by following examples:
|
160 |
+
''')
|
161 |
+
examples = gr.Examples(examples=
|
162 |
+
[ "https://www.youtube.com/watch?v=ZXVDnuepW2s",
|
163 |
+
"https://www.youtube.com/watch?v=X8_glJn1B8o",
|
164 |
+
"https://www.youtube.com/watch?v=80yqL2KzBVw"],
|
165 |
+
label="Examples", inputs=[youtube_url_in])
|
166 |
+
with gr.Column():
|
167 |
+
youtube_url_in.render()
|
168 |
+
download_youtube_btn = gr.Button("Download Youtube video")
|
169 |
+
download_youtube_btn.click(get_youtube, [youtube_url_in], [
|
170 |
+
video_in])
|
171 |
+
print(video_in)
|
172 |
+
with gr.Row():
|
173 |
video_in.render()
|
174 |
video_out.render()
|
175 |
with gr.Row():
|