muneebable commited on
Commit
0f3dbed
·
verified ·
1 Parent(s): a70d8d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -123
app.py CHANGED
@@ -1,142 +1,102 @@
1
  import gradio as gr
2
  import numpy as np
3
  import random
4
- #import spaces #[uncomment to use ZeroGPU]
5
- from diffusers import DiffusionPipeline
6
  import torch
 
 
 
 
 
 
7
 
 
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
- model_repo_id = "stabilityai/sdxl-turbo" #Replace to the model you would like to use
10
 
11
- if torch.cuda.is_available():
12
- torch_dtype = torch.float16
13
- else:
14
- torch_dtype = torch.float32
15
 
16
- pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
17
- pipe = pipe.to(device)
 
 
 
 
 
 
 
 
 
18
 
19
- MAX_SEED = np.iinfo(np.int32).max
20
- MAX_IMAGE_SIZE = 1024
 
 
 
 
 
21
 
22
- #@spaces.GPU #[uncomment to use ZeroGPU]
23
- def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
 
 
24
 
 
 
25
  if randomize_seed:
26
- seed = random.randint(0, MAX_SEED)
27
-
28
- generator = torch.Generator().manual_seed(seed)
29
 
30
- image = pipe(
31
- prompt = prompt,
32
- negative_prompt = negative_prompt,
33
- guidance_scale = guidance_scale,
34
- num_inference_steps = num_inference_steps,
35
- width = width,
36
- height = height,
37
- generator = generator
38
- ).images[0]
39
 
40
- return image, seed
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- examples = [
43
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
44
- "An astronaut riding a green horse",
45
- "A delicious ceviche cheesecake slice",
46
- ]
 
47
 
48
- css="""
49
- #col-container {
50
- margin: 0 auto;
51
- max-width: 640px;
52
- }
53
- """
54
 
55
- with gr.Blocks(css=css) as demo:
56
-
57
- with gr.Column(elem_id="col-container"):
58
- gr.Markdown(f"""
59
- # Text-to-Image Gradio Template
60
- """)
61
-
62
- with gr.Row():
63
-
64
- prompt = gr.Text(
65
- label="Prompt",
66
- show_label=False,
67
- max_lines=1,
68
- placeholder="Enter your prompt",
69
- container=False,
70
- )
71
-
72
- run_button = gr.Button("Run", scale=0)
73
-
74
- result = gr.Image(label="Result", show_label=False)
75
 
76
- with gr.Accordion("Advanced Settings", open=False):
77
-
78
- negative_prompt = gr.Text(
79
- label="Negative prompt",
80
- max_lines=1,
81
- placeholder="Enter a negative prompt",
82
- visible=False,
83
- )
84
-
85
- seed = gr.Slider(
86
- label="Seed",
87
- minimum=0,
88
- maximum=MAX_SEED,
89
- step=1,
90
- value=0,
91
- )
92
-
93
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
94
-
95
- with gr.Row():
96
-
97
- width = gr.Slider(
98
- label="Width",
99
- minimum=256,
100
- maximum=MAX_IMAGE_SIZE,
101
- step=32,
102
- value=1024, #Replace with defaults that work for your model
103
- )
104
-
105
- height = gr.Slider(
106
- label="Height",
107
- minimum=256,
108
- maximum=MAX_IMAGE_SIZE,
109
- step=32,
110
- value=1024, #Replace with defaults that work for your model
111
- )
112
-
113
- with gr.Row():
114
-
115
- guidance_scale = gr.Slider(
116
- label="Guidance scale",
117
- minimum=0.0,
118
- maximum=10.0,
119
- step=0.1,
120
- value=0.0, #Replace with defaults that work for your model
121
- )
122
-
123
- num_inference_steps = gr.Slider(
124
- label="Number of inference steps",
125
- minimum=1,
126
- maximum=50,
127
- step=1,
128
- value=2, #Replace with defaults that work for your model
129
- )
130
-
131
- gr.Examples(
132
- examples = examples,
133
- inputs = [prompt]
134
- )
135
- gr.on(
136
- triggers=[run_button.click, prompt.submit],
137
- fn = infer,
138
- inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
139
- outputs = [result, seed]
140
- )
141
 
142
- demo.queue().launch()
 
1
  import gradio as gr
2
  import numpy as np
3
  import random
 
 
4
  import torch
5
+ from diffusers import DDPMPipeline, DDIMScheduler
6
+ import open_clip
7
+ import torchvision
8
+ from PIL import Image
9
+ from tqdm import tqdm
10
+ import torch.nn.functional as F
11
 
12
+ # Initialize device
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
14
 
15
+ # Load CLIP model
16
+ clip_model, _, preprocess = open_clip.create_model_and_transforms("ViT-B-32", pretrained="openai")
17
+ clip_model.to(device)
 
18
 
19
+ # Transform to preprocess images
20
+ tfms = torchvision.transforms.Compose(
21
+ [
22
+ torchvision.transforms.Resize((224, 224)),
23
+ torchvision.transforms.ToTensor(),
24
+ torchvision.transforms.Normalize(
25
+ mean=(0.48145466, 0.4578275, 0.40821073),
26
+ std=(0.26862954, 0.26130258, 0.27577711),
27
+ ),
28
+ ]
29
+ )
30
 
31
+ # CLIP Loss function
32
+ def clip_loss(image, text_features):
33
+ image_features = clip_model.encode_image(tfms(image).unsqueeze(0).to(device))
34
+ image_features = F.normalize(image_features, dim=-1)
35
+ text_features = F.normalize(text_features, dim=-1)
36
+ loss = (1 - torch.cosine_similarity(image_features, text_features)).mean()
37
+ return loss
38
 
39
+ # Load Diffusion model
40
+ model_repo_id = "muneebable/ddpm-celebahq-finetuned-anime-art" # Replace with desired model repo
41
+ image_pipe = DDPMPipeline.from_pretrained(model_repo_id)
42
+ image_pipe.to(device)
43
+
44
+ # Load scheduler
45
+ scheduler = DDIMScheduler.from_pretrained(model_repo_id)
46
+ scheduler.set_timesteps(num_inference_steps=40)
47
 
48
+ # Gradio Inference Function
49
+ def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
50
  if randomize_seed:
51
+ seed = random.randint(0, np.iinfo(np.int32).max)
52
+ generator = torch.manual_seed(seed)
 
53
 
54
+ # Embed prompt with CLIP
55
+ text = open_clip.tokenize([prompt]).to(device)
56
+ with torch.no_grad():
57
+ text_features = clip_model.encode_text(text)
58
+
59
+ x = torch.randn(4, 3, 256, 256).to(device)
 
 
 
60
 
61
+ for i, t in tqdm(enumerate(scheduler.timesteps)):
62
+ model_input = scheduler.scale_model_input(x, t)
63
+ with torch.no_grad():
64
+ noise_pred = image_pipe.unet(model_input, t)["sample"]
65
+ cond_grad = 0
66
+ for cut in range(4):
67
+ x = x.detach().requires_grad_()
68
+ x0 = scheduler.step(noise_pred, t, x).pred_original_sample
69
+ loss = clip_loss(x0, text_features) * guidance_scale
70
+ cond_grad -= torch.autograd.grad(loss, x)[0] / 4
71
+ alpha_bar = scheduler.alphas_cumprod[i]
72
+ x = x.detach() + cond_grad * alpha_bar.sqrt()
73
+ x = scheduler.step(noise_pred, t, x).prev_sample
74
 
75
+ # Convert output to an image
76
+ grid = torchvision.utils.make_grid(x.detach(), nrow=4)
77
+ im = grid.permute(1, 2, 0).cpu().clip(-1, 1) * 0.5 + 0.5
78
+ result_image = Image.fromarray((im.numpy() * 255).astype(np.uint8))
79
+
80
+ return result_image, seed
81
 
82
+ # Gradio App
83
+ with gr.Blocks() as demo:
84
+ prompt = gr.Textbox(placeholder="Enter your prompt", label="Prompt")
85
+ run_button = gr.Button("Generate")
 
 
86
 
87
+ result = gr.Image(label="Generated Image")
88
+
89
+ with gr.Accordion("Advanced Settings"):
90
+ negative_prompt = gr.Textbox(label="Negative Prompt")
91
+ seed = gr.Slider(0, np.iinfo(np.int32).max, value=0, label="Seed")
92
+ randomize_seed = gr.Checkbox(True, label="Randomize Seed")
93
+ width = gr.Slider(256, 1024, value=512, label="Width")
94
+ height = gr.Slider(256, 1024, value=512, label="Height")
95
+ guidance_scale = gr.Slider(0.0, 10.0, value=7.5, label="Guidance Scale")
96
+ num_inference_steps = gr.Slider(1, 50, value=50, label="Steps")
 
 
 
 
 
 
 
 
 
 
97
 
98
+ run_button.click(infer,
99
+ inputs=[prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
100
+ outputs=[result, seed])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ demo.queue().launch()