Spaces:

multimodalart
/

latentdiffusion

Running on L4

App Files Files Community

multimodalart HF staff commited on Dec 5, 2024

Commit

ef32f64

verified ·

1 Parent(s): b369adb

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -9

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ sys.path.append('./latent-diffusion')
 from taming.models import vqgan
 from ldm.util import instantiate_from_config
 from huggingface_hub import hf_hub_download
 model_path_e = hf_hub_download(repo_id="multimodalart/compvis-latent-diffusion-text2img-large", filename="txt2img-f8-large.ckpt")
@@ -100,6 +101,7 @@ model = model.to(device)
 safety_model = load_safety_model("ViT-B/32")
 clip_model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='openai')
 def run(prompt, steps, width, height, images, scale):
     opt = argparse.Namespace(
         prompt = prompt,
@@ -179,20 +181,23 @@ def run(prompt, steps, width, height, images, scale):
     grid = 255. * rearrange(grid, 'c h w -> h w c').cpu().numpy()
     Image.fromarray(grid.astype(np.uint8)).save(os.path.join(outpath, f'{prompt.replace(" ", "-")}.png'))
-    return(Image.fromarray(grid.astype(np.uint8)),all_samples_images,None)
-image = gr.outputs.Image(type="pil", label="Your result")
 css = ".output-image{height: 528px !important} .output-carousel .output-image{height:272px !important} a{text-decoration: underline}"
 iface = gr.Interface(fn=run, inputs=[
-    gr.inputs.Textbox(label="Prompt - try adding increments to your prompt such as 'oil on canvas', 'a painting', 'a book cover'",default="chalk pastel drawing of a dog wearing a funny hat"),
-    gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=45,maximum=50,minimum=1,step=1),
-    gr.inputs.Radio(label="Width", choices=[32,64,128,256],default=256),
-    gr.inputs.Radio(label="Height", choices=[32,64,128,256],default=256),
-    gr.inputs.Slider(label="Images - How many images you wish to generate", default=2, step=1, minimum=1, maximum=4),
-    gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=5.0, minimum=1.0, maximum=15.0),
     #gr.inputs.Slider(label="ETA - between 0 and 1. Lower values can provide better quality, higher values can be more diverse",default=0.0,minimum=0.0, maximum=1.0,step=0.1),
     ],
-    outputs=[image,gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")],
     css=css,
     title="Generate images from text with Latent Diffusion LAION-400M",
     description="<div>By typing a prompt and pressing submit you can generate images based on this prompt. <a href='https://github.com/CompVis/latent-diffusion' target='_blank'>Latent Diffusion</a> is a text-to-image model created by <a href='https://github.com/CompVis' target='_blank'>CompVis</a>, trained on the <a href='https://laion.ai/laion-400-open-dataset/'>LAION-400M dataset.</a><br>This UI to the model was assembled by <a style='color: rgb(245, 158, 11);font-weight:bold' href='https://twitter.com/multimodalart' target='_blank'>@multimodalart</a></div>",

 from taming.models import vqgan
 from ldm.util import instantiate_from_config
 from huggingface_hub import hf_hub_download
+import spaces
 model_path_e = hf_hub_download(repo_id="multimodalart/compvis-latent-diffusion-text2img-large", filename="txt2img-f8-large.ckpt")
 safety_model = load_safety_model("ViT-B/32")
 clip_model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='openai')
+@spaces.GPU
 def run(prompt, steps, width, height, images, scale):
     opt = argparse.Namespace(
         prompt = prompt,
     grid = 255. * rearrange(grid, 'c h w -> h w c').cpu().numpy()
     Image.fromarray(grid.astype(np.uint8)).save(os.path.join(outpath, f'{prompt.replace(" ", "-")}.png'))
+    return(all_samples_images,Image.fromarray(grid.astype(np.uint8)),None)
+image = gr.Image(type="pil", label="Image Grid")
 css = ".output-image{height: 528px !important} .output-carousel .output-image{height:272px !important} a{text-decoration: underline}"
 iface = gr.Interface(fn=run, inputs=[
+    gr.Textbox(label="Prompt - try adding increments to your prompt such as 'oil on canvas', 'a painting', 'a book cover'",default="chalk pastel drawing of a dog wearing a funny hat"),
+    gr.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=45,maximum=50,minimum=1,step=1),
+    gr.Radio(label="Width", choices=[32,64,128,256],default=256),
+    gr.Radio(label="Height", choices=[32,64,128,256],default=256),
+    gr.Slider(label="Images - How many images you wish to generate", default=2, step=1, minimum=1, maximum=4),
+    gr.Slider(label="Diversity scale - How different from one another you wish the images to be",default=5.0, minimum=1.0, maximum=15.0),
     #gr.inputs.Slider(label="ETA - between 0 and 1. Lower values can provide better quality, higher values can be more diverse",default=0.0,minimum=0.0, maximum=1.0,step=0.1),
     ],
+    outputs=[gr.Gallery(label="Individual images"),
+            image,
+            gr.Textbox(label="Error")
+            ],
     css=css,
     title="Generate images from text with Latent Diffusion LAION-400M",
     description="<div>By typing a prompt and pressing submit you can generate images based on this prompt. <a href='https://github.com/CompVis/latent-diffusion' target='_blank'>Latent Diffusion</a> is a text-to-image model created by <a href='https://github.com/CompVis' target='_blank'>CompVis</a>, trained on the <a href='https://laion.ai/laion-400-open-dataset/'>LAION-400M dataset.</a><br>This UI to the model was assembled by <a style='color: rgb(245, 158, 11);font-weight:bold' href='https://twitter.com/multimodalart' target='_blank'>@multimodalart</a></div>",