File size: 6,155 Bytes
0a9bdfb
a874577
 
 
0a9bdfb
 
 
a874577
0a9bdfb
 
7b495d7
 
0a9bdfb
a874577
 
 
 
 
 
 
 
 
 
 
 
 
a0d251a
a874577
 
 
 
 
 
0a9bdfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a874577
0a9bdfb
 
 
 
 
c1acf76
0a9bdfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86760f1
0a9bdfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1190e23
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import gradio as gr
import os
from huggingface_hub import hf_hub_download

from musepose_inference import MusePoseInference
from pose_align import PoseAlignmentInference


class App:
    def __init__(self):
        self.pose_alignment_infer = PoseAlignmentInference()
        self.musepose_infer = MusePoseInference()

    @staticmethod
    def download_models():
        repo_id = 'jhj0517/MusePose'
        model_paths = {
            "det_ckpt": os.path.join("pretrained_weights", "dwpose", "yolox_l_8x8_300e_coco.pth"),
            "pose_ckpt": os.path.join("pretrained_weights", "dwpose", "dw-ll_ucoco_384.pth")
        }
        for name, file_path in model_paths.items():

            local_dir, filename = os.path.dirname(file_path), os.path.basename(file_path)
            if not os.path.exists(local_dir):
                os.makedirs(local_dir)

            remote_filepath = f"dwpose/{filename}"
            if not os.path.exists(file_path):
                print(file_path)
                hf_hub_download(repo_id=repo_id, filename=remote_filepath,
                                local_dir=local_dir,
                                local_dir_use_symlinks=False)

    def musepose_demo(self):
        with gr.Blocks() as demo:
            with gr.Tabs():
                with gr.TabItem('Step1: Pose Alignment'):
                    with gr.Row():
                        with gr.Column(scale=3):
                            img_input = gr.Image(label="Input Image here", type="filepath", scale=5)
                            vid_dance_input = gr.Video(label="Input Dance Video", scale=5)
                        with gr.Column(scale=3):
                            vid_dance_output = gr.Video(label="Aligned pose output will be displayed here", scale=5)
                            vid_dance_output_demo = gr.Video(label="Output demo video will be displayed here", scale=5)
                        with gr.Column(scale=3):
                            with gr.Column():
                                nb_detect_resolution = gr.Number(label="Detect Resolution", value=512, precision=0)
                                nb_image_resolution = gr.Number(label="Image Resolution.", value=720, precision=0)
                                nb_align_frame = gr.Number(label="Align Frame", value=0, precision=0)
                                nb_max_frame = gr.Number(label="Max Frame", value=300, precision=0)

                            with gr.Row():
                                btn_algin_pose = gr.Button("ALIGN POSE", variant="primary")
                                btn_down = gr.Button("download", variant="primary")

                btn_algin_pose.click(fn=self.pose_alignment_infer.align_pose,
                                     inputs=[vid_dance_input, img_input, nb_detect_resolution, nb_image_resolution,
                                             nb_align_frame, nb_max_frame],
                                     outputs=[vid_dance_output, vid_dance_output_demo])
                btn_down.click(fn=self.download_models, inputs=None, outputs=None)

                with gr.TabItem('Step2: MusePose Inference'):
                    with gr.Row():
                        with gr.Column(scale=3):
                            img_input = gr.Image(label="Input Image here", type="filepath", scale=5)
                            vid_pose_input = gr.Video(label="Input Aligned Pose Video here", scale=5)
                        with gr.Column(scale=3):
                            vid_output = gr.Video(label="Output Video will be displayed here", scale=5)
                            vid_output_demo = gr.Video(label="Output demo video will be displayed here", scale=5)

                        with gr.Column(scale=3):
                            with gr.Column():
                                weight_dtype = gr.Dropdown(label="Compute Type", choices=["fp16", "fp32"],
                                                           value="fp16")
                                nb_width = gr.Number(label="Width.", value=512, precision=0)
                                nb_height = gr.Number(label="Height.", value=512, precision=0)
                                nb_video_frame_length = gr.Number(label="Video Frame Length", value=300, precision=0)
                                nb_video_slice_frame_length = gr.Number(label="Video Slice Frame Number ", value=48,
                                                                        precision=0)
                                nb_video_slice_overlap_frame_number = gr.Number(
                                    label="Video Slice Overlap Frame Number", value=4, precision=0)
                                nb_cfg = gr.Number(label="CFG (Classifier Free Guidance)", value=3.5, precision=0)
                                nb_seed = gr.Number(label="Seed", value=99, precision=0)
                                nb_steps = gr.Number(label="DDIM Sampling Steps", value=20, precision=0)
                                nb_fps = gr.Number(label="FPS (Frames Per Second) ", value=-1, precision=0,
                                                   info="Set to '-1' to use same FPS with pose's")
                                nb_skip = gr.Number(label="SKIP (Frame Sample Rate = SKIP+1)", value=1, precision=0)

                            with gr.Row():
                                btn_generate = gr.Button("GENERATE", variant="primary")

                btn_generate.click(fn=self.musepose_infer.infer_musepose,
                                   inputs=[img_input, vid_pose_input, weight_dtype, nb_width, nb_height,
                                           nb_video_frame_length,
                                           nb_video_slice_frame_length, nb_video_slice_overlap_frame_number, nb_cfg,
                                           nb_seed,
                                           nb_steps, nb_fps, nb_skip],
                                   outputs=[vid_output, vid_output_demo])
        return demo

    def launch(self):
        demo = self.musepose_demo()
        demo.queue().launch()


if __name__ == "__main__":
    app = App()
    app.launch()