Fabrice-TIERCELIN commited on
Commit
b4d526c
·
verified ·
1 Parent(s): 6dfe7c7

Handle any size

Browse files

This PR handles and restores any original sizes, even greater than 1 million of pixels:
1. It saves the original size
2. It computes the best size for SDXL, even if it's landscape or portrait
3. It handles that the size must be multiple of 8
4. After the computation, it restores the orignal size

This code is already used here: https://huggingface.co/spaces/Fabrice-TIERCELIN/Uncrop

Files changed (1) hide show
  1. app.py +28 -10
app.py CHANGED
@@ -11,22 +11,40 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
11
  pipe = AutoPipelineForImage2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16) if torch.cuda.is_available() else AutoPipelineForImage2Image.from_pretrained("stabilityai/sdxl-turbo")
12
  pipe = pipe.to(device)
13
 
14
- def resize(value,img):
15
- img = Image.open(img)
16
- img = img.resize((value,value))
17
- return img
18
-
19
  def infer(source_img, prompt, steps, seed, Strength):
20
  generator = torch.Generator(device).manual_seed(seed)
21
  if int(steps * Strength) < 1:
22
  steps = math.ceil(1 / max(0.10, Strength))
23
- source_image = resize(512, source_img)
24
- source_image.save('source.png')
25
- image = pipe(prompt, image=source_image, strength=Strength, guidance_scale=0.0, num_inference_steps=steps).images[0]
26
- return image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  gr.Interface(fn=infer, inputs=[
29
- gr.Image(sources=["upload", "webcam", "clipboard"], type="filepath", label="Raw Image."),
30
  gr.Textbox(label = 'Prompt Input Text. 77 Token (Keyword or Symbol) Maximum'),
31
  gr.Slider(1, 5, value = 2, step = 1, label = 'Number of Iterations'),
32
  gr.Slider(label = "Seed", minimum = 0, maximum = 987654321987654321, step = 1, randomize = True),
 
11
  pipe = AutoPipelineForImage2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16) if torch.cuda.is_available() else AutoPipelineForImage2Image.from_pretrained("stabilityai/sdxl-turbo")
12
  pipe = pipe.to(device)
13
 
 
 
 
 
 
14
  def infer(source_img, prompt, steps, seed, Strength):
15
  generator = torch.Generator(device).manual_seed(seed)
16
  if int(steps * Strength) < 1:
17
  steps = math.ceil(1 / max(0.10, Strength))
18
+
19
+ original_height, original_width, original_channel = np.array(source_img).shape
20
+
21
+ # Limited to 1 million pixels
22
+ if 1024 * 1024 < original_width * original_height:
23
+ factor = ((1024 * 1024) / (original_width * original_height))**0.5
24
+ process_width = math.floor(original_width * factor)
25
+ process_height = math.floor(original_height * factor)
26
+ else:
27
+ process_width = original_width
28
+ process_height = original_height
29
+
30
+ # Width and height must be multiple of 8
31
+ if (process_width % 8) != 0 or (process_height % 8) != 0:
32
+ process_width = process_width - (process_width % 8)
33
+ process_height = process_height - (process_height % 8)
34
+
35
+ if ((process_width + 8) * (process_height + 8)) <= (1024 * 1024):
36
+ process_width = process_width + 8
37
+ process_height = process_height + 8
38
+
39
+ source_image = source_img.resize((process_width, process_height))
40
+
41
+ image = pipe(prompt, image=source_image, strength=Strength, guidance_scale=0.0, num_inference_steps=steps, width = process_width, height = process_height).images[0]
42
+
43
+ output_image = image.resize((original_width, original_height))
44
+ return output_image
45
 
46
  gr.Interface(fn=infer, inputs=[
47
+ gr.Image(sources=["upload", "webcam", "clipboard"], type = "pil", label="Raw Image."),
48
  gr.Textbox(label = 'Prompt Input Text. 77 Token (Keyword or Symbol) Maximum'),
49
  gr.Slider(1, 5, value = 2, step = 1, label = 'Number of Iterations'),
50
  gr.Slider(label = "Seed", minimum = 0, maximum = 987654321987654321, step = 1, randomize = True),