Spaces:

wwen1997
/

Framer

Running on Zero

App Files Files Community

wwen1997 commited on Oct 29, 2024

Commit

87a9ce9

verified ·

1 Parent(s): c0f86bb

Update app.py

Browse files

Files changed (1) hide show

app.py +266 -265

app.py CHANGED Viewed

@@ -91,16 +91,6 @@ def get_args():
     return args
-args = get_args()
-ensure_dirname(args.output_dir)
-color_list = []
-for i in range(20):
-    color = np.concatenate([np.random.random(4)*255], axis=0)
-    color_list.append(color)
 def interpolate_trajectory(points, n_points):
     x = [point[0] for point in points]
     y = [point[1] for point in points]
@@ -536,264 +526,275 @@ class Drag:
         return val_save_dir
-with gr.Blocks() as demo:
-    gr.Markdown("""<h1 align="center">Framer: Interactive Frame Interpolation</h1><br>""")
-    gr.Markdown("""Gradio Demo for <a href='https://arxiv.org/abs/2410.18978'><b>Framer: Interactive Frame Interpolation</b></a>.<br>
-                Github Repo can be found at https://github.com/aim-uofa/Framer<br>
-                The template is inspired by DragAnything.""")
-    gr.Image(label="Framer: Interactive Frame Interpolation", value="assets/demos.gif", height=432, width=768)
-    gr.Markdown("""## Usage: <br>
-                1. Upload images<br>
-                &ensp;  1.1  Upload the start image via the "Upload Start Image" button.<br>
-                &ensp;  1.2. Upload the end image via the "Upload End Image" button.<br>
-                2. (Optional) Draw some drags.<br>
-                &ensp;  2.1. Click "Add Drag Trajectory" to add the motion trajectory.<br>
-                &ensp;  2.2. You can click several points on either start or end image to forms a path.<br>
-                &ensp;  2.3. Click "Delete last drag" to delete the whole lastest path.<br>
-                &ensp;  2.4. Click "Delete last step" to delete the lastest clicked control point.<br>
-                3. Interpolate the images (according the path) with a click on "Run" button. <br>""")
-    # device, args, height, width, model_length
-    Framer = Drag("cuda", args, 320, 512, 14)
-    first_frame_path = gr.State()
-    last_frame_path = gr.State()
-    tracking_points = gr.State([])
-    def reset_states(first_frame_path, last_frame_path, tracking_points):
         first_frame_path = gr.State()
         last_frame_path = gr.State()
         tracking_points = gr.State([])
-        return first_frame_path, last_frame_path, tracking_points
-    def preprocess_image(image):
-        image_pil = image2pil(image.name)
-        raw_w, raw_h = image_pil.size
-        # resize_ratio = max(512 / raw_w, 320 / raw_h)
-        # image_pil = image_pil.resize((int(raw_w * resize_ratio), int(raw_h * resize_ratio)), Image.BILINEAR)
-        # image_pil = transforms.CenterCrop((320, 512))(image_pil.convert('RGB'))
-        image_pil = image_pil.resize((512, 320), Image.BILINEAR)
-        first_frame_path = os.path.join(args.output_dir, f"first_frame_{str(uuid.uuid4())[:4]}.png")
-        image_pil.save(first_frame_path)
-        return first_frame_path, first_frame_path, gr.State([])
-    def preprocess_image_end(image_end):
-        image_end_pil = image2pil(image_end.name)
-        raw_w, raw_h = image_end_pil.size
-        # resize_ratio = max(512 / raw_w, 320 / raw_h)
-        # image_end_pil = image_end_pil.resize((int(raw_w * resize_ratio), int(raw_h * resize_ratio)), Image.BILINEAR)
-        # image_end_pil = transforms.CenterCrop((320, 512))(image_end_pil.convert('RGB'))
-        image_end_pil = image_end_pil.resize((512, 320), Image.BILINEAR)
-        last_frame_path = os.path.join(args.output_dir, f"last_frame_{str(uuid.uuid4())[:4]}.png")
-        image_end_pil.save(last_frame_path)
-        return last_frame_path, last_frame_path, gr.State([])
-    def add_drag(tracking_points):
-        tracking_points.constructor_args['value'].append([])
-        return tracking_points
-    def delete_last_drag(tracking_points, first_frame_path, last_frame_path):
-        tracking_points.constructor_args['value'].pop()
-        transparent_background = Image.open(first_frame_path).convert('RGBA')
-        transparent_background_end = Image.open(last_frame_path).convert('RGBA')
-        w, h = transparent_background.size
-        transparent_layer = np.zeros((h, w, 4))
-        for track in tracking_points.constructor_args['value']:
-            if len(track) > 1:
-                for i in range(len(track)-1):
-                    start_point = track[i]
-                    end_point = track[i+1]
-                    vx = end_point[0] - start_point[0]
-                    vy = end_point[1] - start_point[1]
-                    arrow_length = np.sqrt(vx**2 + vy**2)
-                    if i == len(track)-2:
-                        cv2.arrowedLine(transparent_layer, tuple(start_point), tuple(end_point), (255, 0, 0, 255), 2, tipLength=8 / arrow_length)
-                    else:
-                        cv2.line(transparent_layer, tuple(start_point), tuple(end_point), (255, 0, 0, 255), 2,)
-            else:
-                cv2.circle(transparent_layer, tuple(track[0]), 5, (255, 0, 0, 255), -1)
-        transparent_layer = Image.fromarray(transparent_layer.astype(np.uint8))
-        trajectory_map = Image.alpha_composite(transparent_background, transparent_layer)
-        trajectory_map_end = Image.alpha_composite(transparent_background_end, transparent_layer)
-        return tracking_points, trajectory_map, trajectory_map_end
-    def delete_last_step(tracking_points, first_frame_path, last_frame_path):
-        tracking_points.constructor_args['value'][-1].pop()
-        transparent_background = Image.open(first_frame_path).convert('RGBA')
-        transparent_background_end = Image.open(last_frame_path).convert('RGBA')
-        w, h = transparent_background.size
-        transparent_layer = np.zeros((h, w, 4))
-        for track in tracking_points.constructor_args['value']:
-            if len(track) > 1:
-                for i in range(len(track)-1):
-                    start_point = track[i]
-                    end_point = track[i+1]
-                    vx = end_point[0] - start_point[0]
-                    vy = end_point[1] - start_point[1]
-                    arrow_length = np.sqrt(vx**2 + vy**2)
-                    if i == len(track)-2:
-                        cv2.arrowedLine(transparent_layer, tuple(start_point), tuple(end_point), (255, 0, 0, 255), 2, tipLength=8 / arrow_length)
-                    else:
-                        cv2.line(transparent_layer, tuple(start_point), tuple(end_point), (255, 0, 0, 255), 2,)
-            else:
-                cv2.circle(transparent_layer, tuple(track[0]), 5, (255, 0, 0, 255), -1)
-        transparent_layer = Image.fromarray(transparent_layer.astype(np.uint8))
-        trajectory_map = Image.alpha_composite(transparent_background, transparent_layer)
-        trajectory_map_end = Image.alpha_composite(transparent_background_end, transparent_layer)
-        return tracking_points, trajectory_map, trajectory_map_end
-    def add_tracking_points(tracking_points, first_frame_path, last_frame_path, evt: gr.SelectData):  # SelectData is a subclass of EventData
-        print(f"You selected {evt.value} at {evt.index} from {evt.target}")
-        tracking_points.constructor_args['value'][-1].append(evt.index)
-        transparent_background = Image.open(first_frame_path).convert('RGBA')
-        transparent_background_end = Image.open(last_frame_path).convert('RGBA')
-        w, h = transparent_background.size
-        transparent_layer = 0
-        for idx, track in enumerate(tracking_points.constructor_args['value']):
-            # mask = cv2.imread(
-            #     os.path.join(args.output_dir, f"mask_{idx+1}.jpg")
-            # )
-            mask = np.zeros((320, 512, 3))
-            color = color_list[idx+1]
-            transparent_layer = mask[:, :, 0].reshape(h, w, 1) * color.reshape(1, 1, -1) + transparent_layer
-            if len(track) > 1:
-                for i in range(len(track)-1):
-                    start_point = track[i]
-                    end_point = track[i+1]
-                    vx = end_point[0] - start_point[0]
-                    vy = end_point[1] - start_point[1]
-                    arrow_length = np.sqrt(vx**2 + vy**2)
-                    if i == len(track)-2:
-                        cv2.arrowedLine(transparent_layer, tuple(start_point), tuple(end_point), (255, 0, 0, 255), 2, tipLength=8 / arrow_length)
-                    else:
-                        cv2.line(transparent_layer, tuple(start_point), tuple(end_point), (255, 0, 0, 255), 2,)
-            else:
-                cv2.circle(transparent_layer, tuple(track[0]), 5, (255, 0, 0, 255), -1)
-        transparent_layer = Image.fromarray(transparent_layer.astype(np.uint8))
-        alpha_coef = 0.99
-        im2_data = transparent_layer.getdata()
-        new_im2_data = [(r, g, b, int(a * alpha_coef)) for r, g, b, a in im2_data]
-        transparent_layer.putdata(new_im2_data)
-        trajectory_map = Image.alpha_composite(transparent_background, transparent_layer)
-        trajectory_map_end = Image.alpha_composite(transparent_background_end, transparent_layer)
-        return tracking_points, trajectory_map, trajectory_map_end
-    with gr.Row():
-        with gr.Column(scale=1):
-            image_upload_button = gr.UploadButton(label="Upload Start Image", file_types=["image"])
-            image_end_upload_button = gr.UploadButton(label="Upload End Image", file_types=["image"])
-            # select_area_button = gr.Button(value="Select Area with SAM")
-            add_drag_button = gr.Button(value="Add New Drag Trajectory")
-            reset_button = gr.Button(value="Reset")
-            run_button = gr.Button(value="Run")
-            delete_last_drag_button = gr.Button(value="Delete last drag")
-            delete_last_step_button = gr.Button(value="Delete last step")
-        with gr.Column(scale=7):
-            with gr.Row():
-                with gr.Column(scale=6):
-                    input_image = gr.Image(
-                        label="start frame",
-                        interactive=True,
-                        height=320,
-                        width=512,
-                    )
-                with gr.Column(scale=6):
-                    input_image_end = gr.Image(
-                        label="end frame",
-                        interactive=True,
-                        height=320,
-                        width=512,
-                    )
-    with gr.Row():
-        with gr.Column(scale=1):
-            controlnet_cond_scale = gr.Slider(
-                label='Control Scale',
-                minimum=0.0,
-                maximum=10,
-                step=0.1,
-                value=1.0,
-            )
-            motion_bucket_id = gr.Slider(
-                label='Motion Bucket',
-                minimum=1,
-                maximum=180,
-                step=1,
-                value=100,
-            )
-        with gr.Column(scale=5):
-            output_video = gr.Image(
-                label="Output Video",
-                height=320,
-                width=1152,
-            )
-    with gr.Row():
-        gr.Markdown("""
-            ## Citation
-            ```bibtex
-            @article{wang2024framer,
-              title={Framer: Interactive Frame Interpolation},
-              author={Wang, Wen and Wang, Qiuyu and Zheng, Kecheng and Ouyang, Hao and Chen, Zhekai and Gong, Biao and Chen, Hao and Shen, Yujun and Shen, Chunhua},
-              journal={arXiv preprint https://arxiv.org/abs/2410.18978},
-              year={2024}
-            }
-            ```
-            """)
-    image_upload_button.upload(preprocess_image, image_upload_button, [input_image, first_frame_path, tracking_points])
-    image_end_upload_button.upload(preprocess_image_end, image_end_upload_button, [input_image_end, last_frame_path, tracking_points])
-    add_drag_button.click(add_drag, tracking_points, [tracking_points, ])
-    delete_last_drag_button.click(delete_last_drag, [tracking_points, first_frame_path, last_frame_path], [tracking_points, input_image, input_image_end])
-    delete_last_step_button.click(delete_last_step, [tracking_points, first_frame_path, last_frame_path], [tracking_points, input_image, input_image_end])
-    reset_button.click(reset_states, [first_frame_path, last_frame_path, tracking_points], [first_frame_path, last_frame_path, tracking_points])
-    input_image.select(add_tracking_points, [tracking_points, first_frame_path, last_frame_path], [tracking_points, input_image, input_image_end])
-    input_image_end.select(add_tracking_points, [tracking_points, first_frame_path, last_frame_path], [tracking_points, input_image, input_image_end])
-    run_button.click(Framer.run, [first_frame_path, last_frame_path, tracking_points, controlnet_cond_scale, motion_bucket_id], output_video)
-    demo.launch()

     return args
 def interpolate_trajectory(points, n_points):
     x = [point[0] for point in points]
     y = [point[1] for point in points]
         return val_save_dir
+if __name__ == "__main__":
+    args = get_args()
+    ensure_dirname(args.output_dir)
+    color_list = []
+    for i in range(20):
+        color = np.concatenate([np.random.random(4)*255], axis=0)
+        color_list.append(color)
+    with gr.Blocks() as demo:
+        gr.Markdown("""<h1 align="center">Framer: Interactive Frame Interpolation</h1><br>""")
+        gr.Markdown("""Gradio Demo for <a href='https://arxiv.org/abs/2410.18978'><b>Framer: Interactive Frame Interpolation</b></a>.<br>
+                    Github Repo can be found at https://github.com/aim-uofa/Framer<br>
+                    The template is inspired by DragAnything.""")
+        gr.Image(label="Framer: Interactive Frame Interpolation", value="assets/demos.gif", height=432, width=768)
+        gr.Markdown("""## Usage: <br>
+                    1. Upload images<br>
+                    &ensp;  1.1  Upload the start image via the "Upload Start Image" button.<br>
+                    &ensp;  1.2. Upload the end image via the "Upload End Image" button.<br>
+                    2. (Optional) Draw some drags.<br>
+                    &ensp;  2.1. Click "Add Drag Trajectory" to add the motion trajectory.<br>
+                    &ensp;  2.2. You can click several points on either start or end image to forms a path.<br>
+                    &ensp;  2.3. Click "Delete last drag" to delete the whole lastest path.<br>
+                    &ensp;  2.4. Click "Delete last step" to delete the lastest clicked control point.<br>
+                    3. Interpolate the images (according the path) with a click on "Run" button. <br>""")
+        # device, args, height, width, model_length
+        Framer = Drag("cuda", args, 320, 512, 14)
         first_frame_path = gr.State()
         last_frame_path = gr.State()
         tracking_points = gr.State([])
+        def reset_states(first_frame_path, last_frame_path, tracking_points):
+            first_frame_path = gr.State()
+            last_frame_path = gr.State()
+            tracking_points = gr.State([])
+            return first_frame_path, last_frame_path, tracking_points
+        def preprocess_image(image):
+            image_pil = image2pil(image.name)
+            raw_w, raw_h = image_pil.size
+            # resize_ratio = max(512 / raw_w, 320 / raw_h)
+            # image_pil = image_pil.resize((int(raw_w * resize_ratio), int(raw_h * resize_ratio)), Image.BILINEAR)
+            # image_pil = transforms.CenterCrop((320, 512))(image_pil.convert('RGB'))
+            image_pil = image_pil.resize((512, 320), Image.BILINEAR)
+            first_frame_path = os.path.join(args.output_dir, f"first_frame_{str(uuid.uuid4())[:4]}.png")
+            image_pil.save(first_frame_path)
+            return first_frame_path, first_frame_path, gr.State([])
+        def preprocess_image_end(image_end):
+            image_end_pil = image2pil(image_end.name)
+            raw_w, raw_h = image_end_pil.size
+            # resize_ratio = max(512 / raw_w, 320 / raw_h)
+            # image_end_pil = image_end_pil.resize((int(raw_w * resize_ratio), int(raw_h * resize_ratio)), Image.BILINEAR)
+            # image_end_pil = transforms.CenterCrop((320, 512))(image_end_pil.convert('RGB'))
+            image_end_pil = image_end_pil.resize((512, 320), Image.BILINEAR)
+            last_frame_path = os.path.join(args.output_dir, f"last_frame_{str(uuid.uuid4())[:4]}.png")
+            image_end_pil.save(last_frame_path)
+            return last_frame_path, last_frame_path, gr.State([])
+        def add_drag(tracking_points):
+            tracking_points.constructor_args['value'].append([])
+            return tracking_points
+        def delete_last_drag(tracking_points, first_frame_path, last_frame_path):
+            tracking_points.constructor_args['value'].pop()
+            transparent_background = Image.open(first_frame_path).convert('RGBA')
+            transparent_background_end = Image.open(last_frame_path).convert('RGBA')
+            w, h = transparent_background.size
+            transparent_layer = np.zeros((h, w, 4))
+            for track in tracking_points.constructor_args['value']:
+                if len(track) > 1:
+                    for i in range(len(track)-1):
+                        start_point = track[i]
+                        end_point = track[i+1]
+                        vx = end_point[0] - start_point[0]
+                        vy = end_point[1] - start_point[1]
+                        arrow_length = np.sqrt(vx**2 + vy**2)
+                        if i == len(track)-2:
+                            cv2.arrowedLine(transparent_layer, tuple(start_point), tuple(end_point), (255, 0, 0, 255), 2, tipLength=8 / arrow_length)
+                        else:
+                            cv2.line(transparent_layer, tuple(start_point), tuple(end_point), (255, 0, 0, 255), 2,)
+                else:
+                    cv2.circle(transparent_layer, tuple(track[0]), 5, (255, 0, 0, 255), -1)
+            transparent_layer = Image.fromarray(transparent_layer.astype(np.uint8))
+            trajectory_map = Image.alpha_composite(transparent_background, transparent_layer)
+            trajectory_map_end = Image.alpha_composite(transparent_background_end, transparent_layer)
+            return tracking_points, trajectory_map, trajectory_map_end
+        def delete_last_step(tracking_points, first_frame_path, last_frame_path):
+            tracking_points.constructor_args['value'][-1].pop()
+            transparent_background = Image.open(first_frame_path).convert('RGBA')
+            transparent_background_end = Image.open(last_frame_path).convert('RGBA')
+            w, h = transparent_background.size
+            transparent_layer = np.zeros((h, w, 4))
+            for track in tracking_points.constructor_args['value']:
+                if len(track) > 1:
+                    for i in range(len(track)-1):
+                        start_point = track[i]
+                        end_point = track[i+1]
+                        vx = end_point[0] - start_point[0]
+                        vy = end_point[1] - start_point[1]
+                        arrow_length = np.sqrt(vx**2 + vy**2)
+                        if i == len(track)-2:
+                            cv2.arrowedLine(transparent_layer, tuple(start_point), tuple(end_point), (255, 0, 0, 255), 2, tipLength=8 / arrow_length)
+                        else:
+                            cv2.line(transparent_layer, tuple(start_point), tuple(end_point), (255, 0, 0, 255), 2,)
+                else:
+                    cv2.circle(transparent_layer, tuple(track[0]), 5, (255, 0, 0, 255), -1)
+            transparent_layer = Image.fromarray(transparent_layer.astype(np.uint8))
+            trajectory_map = Image.alpha_composite(transparent_background, transparent_layer)
+            trajectory_map_end = Image.alpha_composite(transparent_background_end, transparent_layer)
+            return tracking_points, trajectory_map, trajectory_map_end
+        def add_tracking_points(tracking_points, first_frame_path, last_frame_path, evt: gr.SelectData):  # SelectData is a subclass of EventData
+            print(f"You selected {evt.value} at {evt.index} from {evt.target}")
+            tracking_points.constructor_args['value'][-1].append(evt.index)
+            transparent_background = Image.open(first_frame_path).convert('RGBA')
+            transparent_background_end = Image.open(last_frame_path).convert('RGBA')
+            w, h = transparent_background.size
+            transparent_layer = 0
+            for idx, track in enumerate(tracking_points.constructor_args['value']):
+                # mask = cv2.imread(
+                #     os.path.join(args.output_dir, f"mask_{idx+1}.jpg")
+                # )
+                mask = np.zeros((320, 512, 3))
+                color = color_list[idx+1]
+                transparent_layer = mask[:, :, 0].reshape(h, w, 1) * color.reshape(1, 1, -1) + transparent_layer
+                if len(track) > 1:
+                    for i in range(len(track)-1):
+                        start_point = track[i]
+                        end_point = track[i+1]
+                        vx = end_point[0] - start_point[0]
+                        vy = end_point[1] - start_point[1]
+                        arrow_length = np.sqrt(vx**2 + vy**2)
+                        if i == len(track)-2:
+                            cv2.arrowedLine(transparent_layer, tuple(start_point), tuple(end_point), (255, 0, 0, 255), 2, tipLength=8 / arrow_length)
+                        else:
+                            cv2.line(transparent_layer, tuple(start_point), tuple(end_point), (255, 0, 0, 255), 2,)
+                else:
+                    cv2.circle(transparent_layer, tuple(track[0]), 5, (255, 0, 0, 255), -1)
+            transparent_layer = Image.fromarray(transparent_layer.astype(np.uint8))
+            alpha_coef = 0.99
+            im2_data = transparent_layer.getdata()
+            new_im2_data = [(r, g, b, int(a * alpha_coef)) for r, g, b, a in im2_data]
+            transparent_layer.putdata(new_im2_data)
+            trajectory_map = Image.alpha_composite(transparent_background, transparent_layer)
+            trajectory_map_end = Image.alpha_composite(transparent_background_end, transparent_layer)
+            return tracking_points, trajectory_map, trajectory_map_end
+        with gr.Row():
+            with gr.Column(scale=1):
+                image_upload_button = gr.UploadButton(label="Upload Start Image", file_types=["image"])
+                image_end_upload_button = gr.UploadButton(label="Upload End Image", file_types=["image"])
+                # select_area_button = gr.Button(value="Select Area with SAM")
+                add_drag_button = gr.Button(value="Add New Drag Trajectory")
+                reset_button = gr.Button(value="Reset")
+                run_button = gr.Button(value="Run")
+                delete_last_drag_button = gr.Button(value="Delete last drag")
+                delete_last_step_button = gr.Button(value="Delete last step")
+            with gr.Column(scale=7):
+                with gr.Row():
+                    with gr.Column(scale=6):
+                        input_image = gr.Image(
+                            label="start frame",
+                            interactive=True,
+                            height=320,
+                            width=512,
+                        )
+                    with gr.Column(scale=6):
+                        input_image_end = gr.Image(
+                            label="end frame",
+                            interactive=True,
+                            height=320,
+                            width=512,
+                        )
+        with gr.Row():
+            with gr.Column(scale=1):
+                controlnet_cond_scale = gr.Slider(
+                    label='Control Scale',
+                    minimum=0.0,
+                    maximum=10,
+                    step=0.1,
+                    value=1.0,
+                )
+                motion_bucket_id = gr.Slider(
+                    label='Motion Bucket',
+                    minimum=1,
+                    maximum=180,
+                    step=1,
+                    value=100,
+                )
+            with gr.Column(scale=5):
+                output_video = gr.Image(
+                    label="Output Video",
+                    height=320,
+                    width=1152,
+                )
+        with gr.Row():
+            gr.Markdown("""
+                ## Citation
+                ```bibtex
+                @article{wang2024framer,
+                  title={Framer: Interactive Frame Interpolation},
+                  author={Wang, Wen and Wang, Qiuyu and Zheng, Kecheng and Ouyang, Hao and Chen, Zhekai and Gong, Biao and Chen, Hao and Shen, Yujun and Shen, Chunhua},
+                  journal={arXiv preprint https://arxiv.org/abs/2410.18978},
+                  year={2024}
+                }
+                ```
+                """)
+        image_upload_button.upload(preprocess_image, image_upload_button, [input_image, first_frame_path, tracking_points])
+        image_end_upload_button.upload(preprocess_image_end, image_end_upload_button, [input_image_end, last_frame_path, tracking_points])
+        add_drag_button.click(add_drag, tracking_points, [tracking_points, ])
+        delete_last_drag_button.click(delete_last_drag, [tracking_points, first_frame_path, last_frame_path], [tracking_points, input_image, input_image_end])
+        delete_last_step_button.click(delete_last_step, [tracking_points, first_frame_path, last_frame_path], [tracking_points, input_image, input_image_end])
+        reset_button.click(reset_states, [first_frame_path, last_frame_path, tracking_points], [first_frame_path, last_frame_path, tracking_points])
+        input_image.select(add_tracking_points, [tracking_points, first_frame_path, last_frame_path], [tracking_points, input_image, input_image_end])
+        input_image_end.select(add_tracking_points, [tracking_points, first_frame_path, last_frame_path], [tracking_points, input_image, input_image_end])
+        run_button.click(Framer.run, [first_frame_path, last_frame_path, tracking_points, controlnet_cond_scale, motion_bucket_id], output_video)
+        demo.launch()