fffiloni commited on
Commit
d506cd1
·
verified ·
1 Parent(s): c9a6087

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -14
app.py CHANGED
@@ -19,6 +19,28 @@ from datetime import datetime
19
  from torchao.quantization import quantize_, int8_weight_only
20
  import gc
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  import requests
23
  import tarfile
24
 
@@ -78,6 +100,8 @@ snapshot_download(
78
  local_dir="./pretrained_weights/sd-image-variations-diffusers"
79
  )
80
 
 
 
81
  # Download and place the Whisper model in the "audio_processor" folder
82
  def download_whisper_model():
83
  url = "https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt"
@@ -118,7 +142,7 @@ elif ffmpeg_path not in os.getenv('PATH'):
118
  os.environ["PATH"] = f"{ffmpeg_path}:{os.environ['PATH']}"
119
 
120
 
121
- def generate(image_input, audio_input, pose_input, width, height, length, steps, sample_rate, cfg, fps, context_frames, context_overlap, quantization_input, seed):
122
  gc.collect()
123
  torch.cuda.empty_cache()
124
  torch.cuda.ipc_collect()
@@ -216,6 +240,10 @@ def generate(image_input, audio_input, pose_input, width, height, length, steps,
216
  seed = random.randint(100, 1000000)
217
  generator = torch.manual_seed(seed)
218
 
 
 
 
 
219
  inputs_dict = {
220
  "refimg": image_input,
221
  "audio": audio_input,
@@ -289,25 +317,36 @@ def generate(image_input, audio_input, pose_input, width, height, length, steps,
289
 
290
  with gr.Blocks() as demo:
291
  gr.Markdown("""
292
- <div>
293
- <h2 style="font-size: 30px;text-align: center;">EchoMimicV2</h2>
294
- </div>
295
- <div style="text-align: center;">
296
- <a href="https://github.com/antgroup/echomimic_v2">🌐 Github</a> |
297
- <a href="https://arxiv.org/abs/2411.10061">📜 arXiv </a>
298
- </div>
299
- <div style="text-align: center; font-weight: bold; color: red;">
300
- ⚠️ This demonstration is for academic research and experiential use only.
301
- </div>
302
 
 
303
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  with gr.Column():
305
  with gr.Row():
306
  with gr.Column():
307
  with gr.Group():
308
  image_input = gr.Image(label="Image Input (Auto Scaling)", type="filepath")
309
- audio_input = gr.Audio(label="Audio Input", type="filepath")
310
- pose_input = gr.Textbox(label="Pose Input (Directory Path)", placeholder="Please enter the directory path for pose data.", value="assets/halfbody_demo/pose/01")
311
  with gr.Accordion("Advanced Settings", open=False):
312
  with gr.Row():
313
  width = gr.Number(label="Width (multiple of 16, recommended: 768)", value=768)
@@ -352,4 +391,4 @@ with gr.Blocks() as demo:
352
 
353
  if __name__ == "__main__":
354
  demo.queue()
355
- demo.launch(inbrowser=True)
 
19
  from torchao.quantization import quantize_, int8_weight_only
20
  import gc
21
 
22
+ import tempfile
23
+ from pydub import AudioSegment
24
+
25
+ def cut_audio_to_5_seconds(audio_path):
26
+ try:
27
+ # Load the audio file
28
+ audio = AudioSegment.from_file(audio_path)
29
+
30
+ # Trim to a maximum of 5 seconds (5000 milliseconds)
31
+ trimmed_audio = audio[:5000]
32
+
33
+ # Create a temporary directory
34
+ temp_dir = tempfile.mkdtemp()
35
+ output_path = os.path.join(temp_dir, "trimmed_audio.wav")
36
+
37
+ # Export the trimmed audio
38
+ trimmed_audio.export(output_path, format="wav")
39
+
40
+ return output_path
41
+ except Exception as e:
42
+ return f"An error occurred while trying to trim audio: {str(e)}"
43
+
44
  import requests
45
  import tarfile
46
 
 
100
  local_dir="./pretrained_weights/sd-image-variations-diffusers"
101
  )
102
 
103
+ is_shared_ui = True if "fffiloni/echomimic-v2" in os.environ['SPACE_ID'] else False
104
+
105
  # Download and place the Whisper model in the "audio_processor" folder
106
  def download_whisper_model():
107
  url = "https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt"
 
142
  os.environ["PATH"] = f"{ffmpeg_path}:{os.environ['PATH']}"
143
 
144
 
145
+ def generate(image_input, audio_input, pose_input, width, height, length, steps, sample_rate, cfg, fps, context_frames, context_overlap, quantization_input, seed, progress=gr.Progress(track_tqdm=True)):
146
  gc.collect()
147
  torch.cuda.empty_cache()
148
  torch.cuda.ipc_collect()
 
240
  seed = random.randint(100, 1000000)
241
  generator = torch.manual_seed(seed)
242
 
243
+ if is_shared_ui:
244
+ audio_input = cut_audio_to_5_seconds(audio_input)
245
+ print(f"Trimmed audio saved at: {audio_input}")
246
+
247
  inputs_dict = {
248
  "refimg": image_input,
249
  "audio": audio_input,
 
317
 
318
  with gr.Blocks() as demo:
319
  gr.Markdown("""
320
+ # EchoMimicV2
 
 
 
 
 
 
 
 
 
321
 
322
+ ⚠️ This demonstration is for academic research and experiential use only.
323
  """)
324
+ gr.HTML("""
325
+ <div style="display:flex;column-gap:4px;">
326
+ <a href="https://github.com/antgroup/echomimic_v2">
327
+ <img src='https://img.shields.io/badge/GitHub-Repo-blue'>
328
+ </a>
329
+ <a href="https://antgroup.github.io/ai/echomimic_v2/">
330
+ <img src='https://img.shields.io/badge/Project-Page-green'>
331
+ </a>
332
+ <a href="https://arxiv.org/abs/2411.10061">
333
+ <img src='https://img.shields.io/badge/ArXiv-Paper-red'>
334
+ </a>
335
+ <a href="https://huggingface.co/spaces/fffiloni/echomimic-v2?duplicate=true">
336
+ <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-sm.svg" alt="Duplicate this Space">
337
+ </a>
338
+ <a href="https://huggingface.co/fffiloni">
339
+ <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/follow-me-on-HF-sm-dark.svg" alt="Follow me on HF">
340
+ </a>
341
+ </div>
342
+ """)
343
  with gr.Column():
344
  with gr.Row():
345
  with gr.Column():
346
  with gr.Group():
347
  image_input = gr.Image(label="Image Input (Auto Scaling)", type="filepath")
348
+ audio_input = gr.Audio(label="Audio Input - max 5 seconds on shared UI", type="filepath")
349
+ # pose_input = gr.Textbox(label="Pose Input (Directory Path)", placeholder="Please enter the directory path for pose data.", value="assets/halfbody_demo/pose/01")
350
  with gr.Accordion("Advanced Settings", open=False):
351
  with gr.Row():
352
  width = gr.Number(label="Width (multiple of 16, recommended: 768)", value=768)
 
391
 
392
  if __name__ == "__main__":
393
  demo.queue()
394
+ demo.launch(show_api=False, show_error=True, ssr_mode=False)