awacke1 commited on
Commit
8095fa0
1 Parent(s): cc75d28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -16
app.py CHANGED
@@ -3,7 +3,6 @@ import torch
3
  import os
4
  import uuid
5
  import random
6
-
7
  from glob import glob
8
  from pathlib import Path
9
  from typing import Optional
@@ -12,6 +11,7 @@ from diffusers.utils import load_image, export_to_video
12
  from PIL import Image
13
  from huggingface_hub import hf_hub_download
14
 
 
15
  pipe = StableVideoDiffusionPipeline.from_pretrained(
16
  "stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
17
  )
@@ -19,6 +19,7 @@ pipe.to("cuda")
19
  pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
20
  max_64_bit_int = 2**63 - 1
21
 
 
22
  def sample(
23
  image: Image,
24
  seed: Optional[int] = 42,
@@ -31,60 +32,48 @@ def sample(
31
  device: str = "cuda",
32
  output_folder: str = "outputs",
33
  ):
 
34
  if image.mode == "RGBA":
35
  image = image.convert("RGB")
36
  if(randomize_seed):
37
  seed = random.randint(0, max_64_bit_int)
 
38
  generator = torch.manual_seed(seed)
39
 
40
- # Count completed mp4 videos and set the path
41
  os.makedirs(output_folder, exist_ok=True)
42
  base_count = len(glob(os.path.join(output_folder, "*.mp4")))
43
  video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
44
-
45
  frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
46
-
47
- # Export frames to video
48
  export_to_video(frames, video_path, fps=fps_id)
49
  torch.manual_seed(seed)
50
-
51
- # Return the video and seed
52
  return video_path, seed
53
 
54
  def resize_image(image, output_size=(1024, 576)):
55
- # Calculate aspect ratios
56
  target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
57
  image_aspect = image.width / image.height # Aspect ratio of the original image
58
 
59
- # Resize then crop if the original image is larger
60
  if image_aspect > target_aspect:
61
- # Resize the image to match the target height, maintaining aspect ratio
62
  new_height = output_size[1]
63
  new_width = int(new_height * image_aspect)
64
  resized_image = image.resize((new_width, new_height), Image.LANCZOS)
65
-
66
- # Calculate coordinates for cropping
67
  left = (new_width - output_size[0]) / 2
68
  top = 0
69
  right = (new_width + output_size[0]) / 2
70
  bottom = output_size[1]
71
  else:
72
- # Resize the image to match the target width, maintaining aspect ratio
73
  new_width = output_size[0]
74
  new_height = int(new_width / image_aspect)
75
  resized_image = image.resize((new_width, new_height), Image.LANCZOS)
76
-
77
- # Calculate coordinates for cropping
78
  left = 0
79
  top = (new_height - output_size[1]) / 2
80
  right = output_size[0]
81
  bottom = (new_height + output_size[1]) / 2
82
 
83
- # Crop the image
84
  cropped_image = resized_image.crop((left, top, right, bottom))
85
  return cropped_image
86
 
87
  with gr.Blocks() as demo:
 
88
  gr.Markdown('''# Stable Video Diffusion using Image 2 Video XT ([model](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt),
89
  [paper](https://stability.ai/research/stable-video-diffusion-scaling-latent-video-diffusion-models-to-large-datasets),
90
  [stability's ui waitlist](https://stability.ai/contact))
@@ -92,12 +81,15 @@ with gr.Blocks() as demo:
92
  ''')
93
 
94
  with gr.Row():
 
95
  with gr.Column():
96
  image = gr.Image(label="Upload your image", type="pil")
97
  generate_btn = gr.Button("Generate")
 
98
  video = gr.Video()
99
 
100
  with gr.Accordion("Advanced options", open=False):
 
101
  seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
102
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
103
  motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
 
3
  import os
4
  import uuid
5
  import random
 
6
  from glob import glob
7
  from pathlib import Path
8
  from typing import Optional
 
11
  from PIL import Image
12
  from huggingface_hub import hf_hub_download
13
 
14
+
15
  pipe = StableVideoDiffusionPipeline.from_pretrained(
16
  "stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
17
  )
 
19
  pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
20
  max_64_bit_int = 2**63 - 1
21
 
22
+
23
  def sample(
24
  image: Image,
25
  seed: Optional[int] = 42,
 
32
  device: str = "cuda",
33
  output_folder: str = "outputs",
34
  ):
35
+
36
  if image.mode == "RGBA":
37
  image = image.convert("RGB")
38
  if(randomize_seed):
39
  seed = random.randint(0, max_64_bit_int)
40
+
41
  generator = torch.manual_seed(seed)
42
 
 
43
  os.makedirs(output_folder, exist_ok=True)
44
  base_count = len(glob(os.path.join(output_folder, "*.mp4")))
45
  video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
 
46
  frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
 
 
47
  export_to_video(frames, video_path, fps=fps_id)
48
  torch.manual_seed(seed)
 
 
49
  return video_path, seed
50
 
51
  def resize_image(image, output_size=(1024, 576)):
 
52
  target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
53
  image_aspect = image.width / image.height # Aspect ratio of the original image
54
 
 
55
  if image_aspect > target_aspect:
 
56
  new_height = output_size[1]
57
  new_width = int(new_height * image_aspect)
58
  resized_image = image.resize((new_width, new_height), Image.LANCZOS)
 
 
59
  left = (new_width - output_size[0]) / 2
60
  top = 0
61
  right = (new_width + output_size[0]) / 2
62
  bottom = output_size[1]
63
  else:
 
64
  new_width = output_size[0]
65
  new_height = int(new_width / image_aspect)
66
  resized_image = image.resize((new_width, new_height), Image.LANCZOS)
 
 
67
  left = 0
68
  top = (new_height - output_size[1]) / 2
69
  right = output_size[0]
70
  bottom = (new_height + output_size[1]) / 2
71
 
 
72
  cropped_image = resized_image.crop((left, top, right, bottom))
73
  return cropped_image
74
 
75
  with gr.Blocks() as demo:
76
+
77
  gr.Markdown('''# Stable Video Diffusion using Image 2 Video XT ([model](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt),
78
  [paper](https://stability.ai/research/stable-video-diffusion-scaling-latent-video-diffusion-models-to-large-datasets),
79
  [stability's ui waitlist](https://stability.ai/contact))
 
81
  ''')
82
 
83
  with gr.Row():
84
+
85
  with gr.Column():
86
  image = gr.Image(label="Upload your image", type="pil")
87
  generate_btn = gr.Button("Generate")
88
+
89
  video = gr.Video()
90
 
91
  with gr.Accordion("Advanced options", open=False):
92
+
93
  seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
94
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
95
  motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)