Spaces:
Running
on
L40S
Running
on
L40S
Update app.py
Browse files
app.py
CHANGED
@@ -266,6 +266,14 @@ def test_fn(model, device, iteration, candidate_json_path, test_path, cfg, audio
|
|
266 |
node["motion_low"] = motion_low_all[i]
|
267 |
|
268 |
graph = graph_pruning(graph)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
# drop the id of gt
|
270 |
idx = 0
|
271 |
audio_waveform, sr = librosa.load(audio_path)
|
@@ -438,7 +446,7 @@ def prepare_all(yaml_name):
|
|
438 |
return config
|
439 |
|
440 |
|
441 |
-
def
|
442 |
import cv2
|
443 |
cap = cv2.VideoCapture(video_path)
|
444 |
|
@@ -452,7 +460,7 @@ def save_first_20_seconds(video_path, output_path="./save_video.mp4"):
|
|
452 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
453 |
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
454 |
|
455 |
-
frames_to_save = fps *
|
456 |
frame_count = 0
|
457 |
|
458 |
while cap.isOpened() and frame_count < frames_to_save:
|
@@ -475,7 +483,6 @@ character_name_to_yaml = {
|
|
475 |
}
|
476 |
|
477 |
cfg = prepare_all("./configs/gradio.yaml")
|
478 |
-
seed_everything(cfg.seed)
|
479 |
|
480 |
smplx_model = smplx.create(
|
481 |
"./emage/smplx_models/",
|
@@ -499,9 +506,10 @@ state_dict = checkpoint['model_state_dict']
|
|
499 |
# new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
|
500 |
model.load_state_dict(state_dict, strict=False)
|
501 |
|
502 |
-
@spaces.GPU(duration=
|
503 |
-
def tango(audio_path, character_name, create_graph=False, video_folder_path=None, smplx_model=smplx_model, model=model, cfg=cfg):
|
504 |
-
|
|
|
505 |
experiment_ckpt_dir = experiment_log_dir = os.path.join(cfg.output_dir, cfg.exp_name)
|
506 |
saved_audio_path = "./saved_audio.wav"
|
507 |
sample_rate, audio_waveform = audio_path
|
@@ -523,7 +531,7 @@ def tango(audio_path, character_name, create_graph=False, video_folder_path=None
|
|
523 |
create_graph=True
|
524 |
# load video, and save it to "./save_video.mp4 for the first 20s of the video."
|
525 |
os.makedirs("./outputs/tmpvideo/", exist_ok=True)
|
526 |
-
|
527 |
|
528 |
if create_graph:
|
529 |
video_folder_path = "./outputs/tmpvideo/"
|
@@ -564,7 +572,7 @@ examples_video = [
|
|
564 |
]
|
565 |
|
566 |
combined_examples = [
|
567 |
-
[audio[0], video[0]] for audio in examples_audio for video in examples_video
|
568 |
]
|
569 |
|
570 |
def make_demo():
|
@@ -589,21 +597,20 @@ def make_demo():
|
|
589 |
"""
|
590 |
)
|
591 |
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
""")
|
607 |
|
608 |
# Create a gallery with 5 videos
|
609 |
with gr.Row():
|
@@ -652,6 +659,8 @@ def make_demo():
|
|
652 |
label="Character Examples",
|
653 |
cache_examples=False
|
654 |
)
|
|
|
|
|
655 |
|
656 |
# Fourth row: Generate video button
|
657 |
with gr.Row():
|
@@ -660,7 +669,7 @@ def make_demo():
|
|
660 |
# Define button click behavior
|
661 |
run_button.click(
|
662 |
fn=tango,
|
663 |
-
inputs=[audio_input, video_input],
|
664 |
outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
|
665 |
)
|
666 |
|
@@ -669,7 +678,7 @@ def make_demo():
|
|
669 |
print(combined_examples)
|
670 |
gr.Examples(
|
671 |
examples=combined_examples,
|
672 |
-
inputs=[audio_input, video_input], # Both audio and video as inputs
|
673 |
outputs=[video_output_1, video_output_2, file_output_1, file_output_2],
|
674 |
fn=tango, # Function that processes both audio and video inputs
|
675 |
label="Select Combined Audio and Video Examples (Cached)",
|
|
|
266 |
node["motion_low"] = motion_low_all[i]
|
267 |
|
268 |
graph = graph_pruning(graph)
|
269 |
+
# for gradio, use a subgraph
|
270 |
+
if len(graph.vs) > 1800:
|
271 |
+
gap = len(graph.vs) - 1800
|
272 |
+
start_d = random.randint(0, 1800)
|
273 |
+
graph.delete_vertices(range(start_d, start_d + gap))
|
274 |
+
ascc_2 = graph.clusters(mode="STRONG")
|
275 |
+
graph = ascc_2.giant()
|
276 |
+
|
277 |
# drop the id of gt
|
278 |
idx = 0
|
279 |
audio_waveform, sr = librosa.load(audio_path)
|
|
|
446 |
return config
|
447 |
|
448 |
|
449 |
+
def save_first_10_seconds(video_path, output_path="./save_video.mp4"):
|
450 |
import cv2
|
451 |
cap = cv2.VideoCapture(video_path)
|
452 |
|
|
|
460 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
461 |
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
462 |
|
463 |
+
frames_to_save = fps * 10
|
464 |
frame_count = 0
|
465 |
|
466 |
while cap.isOpened() and frame_count < frames_to_save:
|
|
|
483 |
}
|
484 |
|
485 |
cfg = prepare_all("./configs/gradio.yaml")
|
|
|
486 |
|
487 |
smplx_model = smplx.create(
|
488 |
"./emage/smplx_models/",
|
|
|
506 |
# new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
|
507 |
model.load_state_dict(state_dict, strict=False)
|
508 |
|
509 |
+
@spaces.GPU(duration=299)
|
510 |
+
def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None, smplx_model=smplx_model, model=model, cfg=cfg):
|
511 |
+
cfg.seed = seed
|
512 |
+
seed_everything(cfg.seed)
|
513 |
experiment_ckpt_dir = experiment_log_dir = os.path.join(cfg.output_dir, cfg.exp_name)
|
514 |
saved_audio_path = "./saved_audio.wav"
|
515 |
sample_rate, audio_waveform = audio_path
|
|
|
531 |
create_graph=True
|
532 |
# load video, and save it to "./save_video.mp4 for the first 20s of the video."
|
533 |
os.makedirs("./outputs/tmpvideo/", exist_ok=True)
|
534 |
+
save_first_10_seconds(character_name, "./outputs/tmpvideo/save_video.mp4")
|
535 |
|
536 |
if create_graph:
|
537 |
video_folder_path = "./outputs/tmpvideo/"
|
|
|
572 |
]
|
573 |
|
574 |
combined_examples = [
|
575 |
+
[audio[0], video[0], 2024] for audio in examples_audio for video in examples_video
|
576 |
]
|
577 |
|
578 |
def make_demo():
|
|
|
597 |
"""
|
598 |
)
|
599 |
|
600 |
+
gr.Markdown("""
|
601 |
+
<h4 style="text-align: left;">
|
602 |
+
This demo is part of an open-source project supported by Hugging Face's free, zero-GPU runtime. Due to runtime cost considerations, it operates in low-quality mode. Some high-quality videos are shown below.
|
603 |
+
|
604 |
+
Details of the low-quality mode:
|
605 |
+
1. Lower resolution.
|
606 |
+
2. More discontinuous frames (causing noticeable "frame jumps").
|
607 |
+
3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
|
608 |
+
4. Accepts audio input of up to 8 seconds. If your input exceeds 8 seconds, only the first 8 seconds will be used.
|
609 |
+
5. You can provide a custom background video for your character, but it is limited to 20 seconds.
|
610 |
+
|
611 |
+
Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
|
612 |
+
</h4>
|
613 |
+
""")
|
|
|
614 |
|
615 |
# Create a gallery with 5 videos
|
616 |
with gr.Row():
|
|
|
659 |
label="Character Examples",
|
660 |
cache_examples=False
|
661 |
)
|
662 |
+
with gr.Row():
|
663 |
+
seed_input = gr.Number(label="Seed", value=2024, interactive=True)
|
664 |
|
665 |
# Fourth row: Generate video button
|
666 |
with gr.Row():
|
|
|
669 |
# Define button click behavior
|
670 |
run_button.click(
|
671 |
fn=tango,
|
672 |
+
inputs=[audio_input, video_input, seed_input],
|
673 |
outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
|
674 |
)
|
675 |
|
|
|
678 |
print(combined_examples)
|
679 |
gr.Examples(
|
680 |
examples=combined_examples,
|
681 |
+
inputs=[audio_input, video_input, seed_input], # Both audio and video as inputs
|
682 |
outputs=[video_output_1, video_output_2, file_output_1, file_output_2],
|
683 |
fn=tango, # Function that processes both audio and video inputs
|
684 |
label="Select Combined Audio and Video Examples (Cached)",
|