Yw22 commited on
Commit
f9cca8d
1 Parent(s): 7825b1f
Files changed (1) hide show
  1. app.py +128 -130
app.py CHANGED
@@ -322,9 +322,8 @@ class ImageConductor:
322
  transforms.ToTensor(),
323
  ])
324
 
325
- image_norm = lambda x: x
326
  image_paths = [first_frame_path]
327
- controlnet_images = [image_norm(image_transforms(Image.open(path).convert("RGB"))) for path in image_paths]
328
  controlnet_images = torch.stack(controlnet_images).unsqueeze(0).to(device)
329
  controlnet_images = rearrange(controlnet_images, "b f c h w -> b c f h w")
330
  num_controlnet_images = controlnet_images.shape[2]
@@ -502,145 +501,144 @@ def delete_last_step(tracking_points, first_frame_path, drag_mode):
502
  return {tracking_points_var: tracking_points, input_image: trajectory_map}
503
 
504
 
505
- if __name__=="__main__":
506
- block = gr.Blocks(
507
- theme=gr.themes.Soft(
508
- radius_size=gr.themes.sizes.radius_none,
509
- text_size=gr.themes.sizes.text_md
510
- )
511
- ).queue()
512
- with block:
513
- with gr.Row():
514
- with gr.Column():
515
- gr.HTML(head)
516
-
517
- gr.Markdown(descriptions)
518
-
519
- with gr.Accordion(label="🛠️ Instructions:", open=True, elem_id="accordion"):
520
- with gr.Row(equal_height=True):
521
- gr.Markdown(instructions)
522
-
523
-
524
- # device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
525
- device = torch.device("cuda")
526
- unet_path = 'models/unet.ckpt'
527
- image_controlnet_path = 'models/image_controlnet.ckpt'
528
- flow_controlnet_path = 'models/flow_controlnet.ckpt'
529
- ImageConductor_net = ImageConductor(device=device,
530
- unet_path=unet_path,
531
- image_controlnet_path=image_controlnet_path,
532
- flow_controlnet_path=flow_controlnet_path,
533
- height=256,
534
- width=384,
535
- model_length=16
536
- )
537
- first_frame_path_var = gr.State(value=None)
538
- tracking_points_var = gr.State([])
539
-
540
- with gr.Row():
541
- with gr.Column(scale=1):
542
- image_upload_button = gr.UploadButton(label="Upload Image",file_types=["image"])
543
- add_drag_button = gr.Button(value="Add Drag")
544
- reset_button = gr.Button(value="Reset")
545
- delete_last_drag_button = gr.Button(value="Delete last drag")
546
- delete_last_step_button = gr.Button(value="Delete last step")
547
-
548
-
549
 
550
- with gr.Column(scale=7):
551
- with gr.Row():
552
- with gr.Column(scale=6):
553
- input_image = gr.Image(label="Input Image",
554
- interactive=True,
555
- height=300,
 
 
 
 
 
556
  width=384,)
557
- with gr.Column(scale=6):
558
- output_image = gr.Image(label="Motion Path",
559
- interactive=False,
560
- height=256,
561
- width=384,)
562
- with gr.Row():
563
- with gr.Column(scale=1):
564
- prompt = gr.Textbox(value="a wonderful elf.", label="Prompt (highly-recommended)", interactive=True, visible=True)
565
- negative_prompt = gr.Text(
566
- label="Negative Prompt",
567
- max_lines=5,
568
- placeholder="Please input your negative prompt",
569
- value='worst quality, low quality, letterboxed',lines=1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  )
571
- drag_mode = gr.Radio(['camera', 'object'], label='Drag mode: ', value='object', scale=2)
572
- run_button = gr.Button(value="Run")
573
-
574
- with gr.Accordion("More input params", open=False, elem_id="accordion1"):
575
- with gr.Group():
576
- seed = gr.Textbox(
577
- label="Seed: ", value=561793204,
578
  )
579
- randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
580
-
581
- with gr.Group():
582
- with gr.Row():
583
- guidance_scale = gr.Slider(
584
- label="Guidance scale",
585
- minimum=1,
586
- maximum=12,
587
- step=0.1,
588
- value=8.5,
589
- )
590
- num_inference_steps = gr.Slider(
591
- label="Number of inference steps",
592
- minimum=1,
593
- maximum=50,
594
- step=1,
595
- value=25,
596
- )
597
-
598
- with gr.Group():
599
- personalized = gr.Dropdown(label="Personalized", choices=['HelloObject', 'TUSUN', ""], value="")
600
- examples_type = gr.Textbox(label="Examples Type (Ignore) ", value="", visible=False)
601
-
602
- with gr.Column(scale=7):
603
- output_video = gr.Video(
604
- label="Output Video",
605
- width=384,
606
- height=256)
607
-
608
-
609
- with gr.Row():
610
- def process_example(input_image, prompt, drag_mode, seed, personalized, examples_type):
611
-
612
- return input_image, prompt, drag_mode, seed, personalized, examples_type
613
-
614
- example = gr.Examples(
615
- label="Input Example",
616
- examples=image_examples,
617
- inputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
618
- outputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
619
- fn=process_example,
620
- run_on_click=True,
621
- examples_per_page=10,
622
- cache_examples=False,
623
- )
624
 
625
 
626
- with gr.Row():
627
- gr.Markdown(citation)
628
-
 
 
 
 
 
 
 
 
 
 
 
 
 
629
 
630
- image_upload_button.upload(preprocess_image, image_upload_button, [input_image, first_frame_path_var, tracking_points_var])
 
 
 
 
631
 
632
- add_drag_button.click(add_drag, [tracking_points_var], tracking_points_var)
633
 
634
- delete_last_drag_button.click(delete_last_drag, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
635
 
636
- delete_last_step_button.click(delete_last_step, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
637
 
638
- reset_button.click(reset_states, [first_frame_path_var, tracking_points_var], [input_image, first_frame_path_var, tracking_points_var])
639
 
640
- input_image.select(add_tracking_points, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
641
 
642
- run_button.click(ImageConductor_net.run, [first_frame_path_var, tracking_points_var, prompt, drag_mode,
643
- negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, personalized, examples_type],
644
- [output_image, output_video])
645
 
646
- block.launch()
 
322
  transforms.ToTensor(),
323
  ])
324
 
 
325
  image_paths = [first_frame_path]
326
+ controlnet_images = [(image_transforms(Image.open(path).convert("RGB"))) for path in image_paths]
327
  controlnet_images = torch.stack(controlnet_images).unsqueeze(0).to(device)
328
  controlnet_images = rearrange(controlnet_images, "b f c h w -> b c f h w")
329
  num_controlnet_images = controlnet_images.shape[2]
 
501
  return {tracking_points_var: tracking_points, input_image: trajectory_map}
502
 
503
 
504
+ block = gr.Blocks(
505
+ theme=gr.themes.Soft(
506
+ radius_size=gr.themes.sizes.radius_none,
507
+ text_size=gr.themes.sizes.text_md
508
+ )
509
+ )
510
+ with block:
511
+ with gr.Row():
512
+ with gr.Column():
513
+ gr.HTML(head)
514
+
515
+ gr.Markdown(descriptions)
516
+
517
+ with gr.Accordion(label="🛠️ Instructions:", open=True, elem_id="accordion"):
518
+ with gr.Row(equal_height=True):
519
+ gr.Markdown(instructions)
520
+
521
+
522
+ # device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
523
+ device = torch.device("cuda")
524
+ unet_path = 'models/unet.ckpt'
525
+ image_controlnet_path = 'models/image_controlnet.ckpt'
526
+ flow_controlnet_path = 'models/flow_controlnet.ckpt'
527
+ ImageConductor_net = ImageConductor(device=device,
528
+ unet_path=unet_path,
529
+ image_controlnet_path=image_controlnet_path,
530
+ flow_controlnet_path=flow_controlnet_path,
531
+ height=256,
532
+ width=384,
533
+ model_length=16
534
+ )
535
+ first_frame_path_var = gr.State(value=None)
536
+ tracking_points_var = gr.State([])
537
+
538
+ with gr.Row():
539
+ with gr.Column(scale=1):
540
+ image_upload_button = gr.UploadButton(label="Upload Image",file_types=["image"])
541
+ add_drag_button = gr.Button(value="Add Drag")
542
+ reset_button = gr.Button(value="Reset")
543
+ delete_last_drag_button = gr.Button(value="Delete last drag")
544
+ delete_last_step_button = gr.Button(value="Delete last step")
545
+
546
+
 
547
 
548
+ with gr.Column(scale=7):
549
+ with gr.Row():
550
+ with gr.Column(scale=6):
551
+ input_image = gr.Image(label="Input Image",
552
+ interactive=True,
553
+ height=300,
554
+ width=384,)
555
+ with gr.Column(scale=6):
556
+ output_image = gr.Image(label="Motion Path",
557
+ interactive=False,
558
+ height=256,
559
  width=384,)
560
+ with gr.Row():
561
+ with gr.Column(scale=1):
562
+ prompt = gr.Textbox(value="a wonderful elf.", label="Prompt (highly-recommended)", interactive=True, visible=True)
563
+ negative_prompt = gr.Text(
564
+ label="Negative Prompt",
565
+ max_lines=5,
566
+ placeholder="Please input your negative prompt",
567
+ value='worst quality, low quality, letterboxed',lines=1
568
+ )
569
+ drag_mode = gr.Radio(['camera', 'object'], label='Drag mode: ', value='object', scale=2)
570
+ run_button = gr.Button(value="Run")
571
+
572
+ with gr.Accordion("More input params", open=False, elem_id="accordion1"):
573
+ with gr.Group():
574
+ seed = gr.Textbox(
575
+ label="Seed: ", value=561793204,
576
+ )
577
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
578
+
579
+ with gr.Group():
580
+ with gr.Row():
581
+ guidance_scale = gr.Slider(
582
+ label="Guidance scale",
583
+ minimum=1,
584
+ maximum=12,
585
+ step=0.1,
586
+ value=8.5,
587
  )
588
+ num_inference_steps = gr.Slider(
589
+ label="Number of inference steps",
590
+ minimum=1,
591
+ maximum=50,
592
+ step=1,
593
+ value=25,
 
594
  )
595
+
596
+ with gr.Group():
597
+ personalized = gr.Dropdown(label="Personalized", choices=['HelloObject', 'TUSUN', ""], value="")
598
+ examples_type = gr.Textbox(label="Examples Type (Ignore) ", value="", visible=False)
599
+
600
+ with gr.Column(scale=7):
601
+ output_video = gr.Video(
602
+ label="Output Video",
603
+ width=384,
604
+ height=256)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
605
 
606
 
607
+ with gr.Row():
608
+ def process_example(input_image, prompt, drag_mode, seed, personalized, examples_type):
609
+
610
+ return input_image, prompt, drag_mode, seed, personalized, examples_type
611
+
612
+ example = gr.Examples(
613
+ label="Input Example",
614
+ examples=image_examples,
615
+ inputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
616
+ outputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
617
+ fn=process_example,
618
+ run_on_click=True,
619
+ examples_per_page=10,
620
+ cache_examples=False,
621
+ )
622
+
623
 
624
+ with gr.Row():
625
+ gr.Markdown(citation)
626
+
627
+
628
+ image_upload_button.upload(preprocess_image, image_upload_button, [input_image, first_frame_path_var, tracking_points_var])
629
 
630
+ add_drag_button.click(add_drag, [tracking_points_var], tracking_points_var)
631
 
632
+ delete_last_drag_button.click(delete_last_drag, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
633
 
634
+ delete_last_step_button.click(delete_last_step, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
635
 
636
+ reset_button.click(reset_states, [first_frame_path_var, tracking_points_var], [input_image, first_frame_path_var, tracking_points_var])
637
 
638
+ input_image.select(add_tracking_points, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
639
 
640
+ run_button.click(ImageConductor_net.run, [first_frame_path_var, tracking_points_var, prompt, drag_mode,
641
+ negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, personalized, examples_type],
642
+ [output_image, output_video])
643
 
644
+ block.queue().launch()