kadirnar commited on
Commit
353dd90
β€’
1 Parent(s): c87f2ab

Update code

Browse files
Files changed (3) hide show
  1. app.py +26 -0
  2. requirements.txt +2 -0
  3. stable_cascade.py +136 -0
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from stable_cascade import web_demo
2
+ import gradio as gr
3
+
4
+ # Create a Gradio interface
5
+
6
+ gradio_app = gr.Blocks()
7
+ with gradio_app:
8
+ gr.HTML(
9
+ """
10
+ <h1 style='text-align: center'>
11
+ WhisperPlus: Advancing Speech-to-Text Processing πŸš€
12
+ </h1>
13
+ """)
14
+ gr.HTML(
15
+ """
16
+ <h3 style='text-align: center'>
17
+ Follow me for more!
18
+ <a href='https://twitter.com/kadirnar_ai' target='_blank'>Twitter</a> | <a href='https://github.com/kadirnar' target='_blank'>Github</a> | <a href='https://www.linkedin.com/in/kadir-nar/' target='_blank'>Linkedin</a> | <a href='https://www.huggingface.co/kadirnar/' target='_blank'>HuggingFace</a>
19
+ </h3>
20
+ """)
21
+ with gr.Row():
22
+ with gr.Column():
23
+ web_demo()
24
+
25
+ gradio_app.queue()
26
+ gradio_app.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ git+https://github.com/kashif/diffusers.git@wuerstchen-v3
2
+ peft
stable_cascade.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from diffusers import StableCascadeDecoderPipeline, StableCascadePriorPipeline
3
+ import gradio as gr
4
+
5
+
6
+ # Initialize the prior and decoder pipelines
7
+ prior = StableCascadePriorPipeline.from_pretrained("stabilityai/stable-cascade-prior", torch_dtype=torch.bfloat16).to("cuda")
8
+ decoder = StableCascadeDecoderPipeline.from_pretrained("stabilityai/stable-cascade", torch_dtype=torch.float16).to("cuda")
9
+
10
+ def generate_images(
11
+ prompt="a photo of a girl",
12
+ negative_prompt="bad,ugly,deformed",
13
+ height=1024,
14
+ width=1024,
15
+ guidance_scale=4.0,
16
+ prior_inference_steps=20,
17
+ decoder_inference_steps=10
18
+ ):
19
+ """
20
+ Generates images based on a given prompt using Stable Diffusion models on CUDA device.
21
+
22
+ Parameters:
23
+ - prompt (str): The prompt to generate images for.
24
+ - negative_prompt (str): The negative prompt to guide image generation away from.
25
+ - height (int): The height of the generated images.
26
+ - width (int): The width of the generated images.
27
+ - guidance_scale (float): The scale of guidance for the image generation.
28
+ - prior_inference_steps (int): The number of inference steps for the prior model.
29
+ - decoder_inference_steps (int): The number of inference steps for the decoder model.
30
+
31
+ Returns:
32
+ - List[PIL.Image]: A list of generated PIL Image objects.
33
+ """
34
+
35
+ # Generate image embeddings using the prior model
36
+ prior_output = prior(
37
+ prompt=prompt,
38
+ height=height,
39
+ width=width,
40
+ negative_prompt=negative_prompt,
41
+ guidance_scale=guidance_scale,
42
+ num_images_per_prompt=1,
43
+ num_inference_steps=prior_inference_steps
44
+ )
45
+
46
+ # Generate images using the decoder model and the embeddings from the prior model
47
+ decoder_output = decoder(
48
+ image_embeddings=prior_output.image_embeddings.half(),
49
+ prompt=prompt,
50
+ negative_prompt=negative_prompt,
51
+ guidance_scale=0.0, # Guidance scale typically set to 0 for decoder as guidance is applied in the prior
52
+ output_type="pil",
53
+ num_inference_steps=decoder_inference_steps
54
+ ).images
55
+
56
+ return decoder_output[0]
57
+
58
+
59
+ def web_demo():
60
+ with gr.Blocks():
61
+ with gr.Row():
62
+ with gr.Column():
63
+ text2image_prompt = gr.Textbox(
64
+ lines=1,
65
+ placeholder="Prompt",
66
+ show_label=False,
67
+ )
68
+
69
+ text2image_negative_prompt = gr.Textbox(
70
+ lines=1,
71
+ placeholder="Negative Prompt",
72
+ show_label=False,
73
+ )
74
+ with gr.Row():
75
+ with gr.Column():
76
+ text2image_height = gr.Slider(
77
+ minimum=128,
78
+ maximum=1280,
79
+ step=32,
80
+ value=512,
81
+ label="Image Height",
82
+ )
83
+
84
+ text2image_width = gr.Slider(
85
+ minimum=128,
86
+ maximum=1280,
87
+ step=32,
88
+ value=512,
89
+ label="Image Width",
90
+ )
91
+ with gr.Row():
92
+ with gr.Column():
93
+ text2image_guidance_scale = gr.Slider(
94
+ minimum=0.1,
95
+ maximum=15,
96
+ step=0.1,
97
+ value=4.0,
98
+ label="Guidance Scale",
99
+ )
100
+ text2image_prior_inference_step = gr.Slider(
101
+ minimum=1,
102
+ maximum=50,
103
+ step=1,
104
+ value=20,
105
+ label="Prior Inference Step",
106
+ )
107
+
108
+ text2image_decoder_inference_step = gr.Slider(
109
+ minimum=1,
110
+ maximum=50,
111
+ step=1,
112
+ value=10,
113
+ label="Decoder Inference Step",
114
+ )
115
+ text2image_predict = gr.Button(value="Generate Image")
116
+
117
+ with gr.Column():
118
+ output_image = gr.Gallery(
119
+ label="Generated images",
120
+ show_label=False,
121
+ elem_id="gallery",
122
+ )
123
+
124
+ text2image_predict.click(
125
+ fn=generate_images,
126
+ inputs=[
127
+ text2image_prompt,
128
+ text2image_negative_prompt,
129
+ text2image_height,
130
+ text2image_width,
131
+ text2image_guidance_scale,
132
+ text2image_prior_inference_step,
133
+ text2image_decoder_inference_step
134
+ ],
135
+ outputs=output_image,
136
+ )