devingulliver commited on
Commit
11a0843
·
1 Parent(s): b66c7cf

Fix warmup steps after JIT to actually work

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -15,10 +15,6 @@ pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1
15
  pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
16
  pipe = pipe.to("cuda")
17
 
18
- # optimize for speed
19
- pipe.unet = torch.compile(pipe.unet, mode="max-autotune", fullgraph=True) # hopefully this works on Ampere series GPU
20
- pipe(prompt="an astronaut riding a green horse", num_inference_steps=25) # force lengthy JIT compilation to happen ahead of time
21
-
22
  # watermarking helper functions. paraphrased from the reference impl of arXiv:2305.20030
23
 
24
  def circle_mask(size=128, r=16, x_offset=0, y_offset=0):
@@ -114,6 +110,10 @@ def detect(image):
114
  def generate(prompt):
115
  return pipe(prompt=prompt, num_inference_steps=25, latents=get_noise()).images[0]
116
 
 
 
 
 
117
  # actual gradio demo
118
 
119
  def manager(input, progress=gr.Progress(track_tqdm=True)): # to prevent the queue from overloading
 
15
  pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
16
  pipe = pipe.to("cuda")
17
 
 
 
 
 
18
  # watermarking helper functions. paraphrased from the reference impl of arXiv:2305.20030
19
 
20
  def circle_mask(size=128, r=16, x_offset=0, y_offset=0):
 
110
  def generate(prompt):
111
  return pipe(prompt=prompt, num_inference_steps=25, latents=get_noise()).images[0]
112
 
113
+ # optimize for speed
114
+ pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
115
+ print(detect(generate("an astronaut riding a green horse"))) # warmup after jit
116
+
117
  # actual gradio demo
118
 
119
  def manager(input, progress=gr.Progress(track_tqdm=True)): # to prevent the queue from overloading