from diffusers import StableDiffusionPipeline import gradio as gr import requests import base64 from PIL import Image, PngImagePlugin from io import BytesIO import torch import os # required for stable difussion auth_token = os.environ.get("auth_token") #in secret space hf_writer = gr.HuggingFaceDatasetSaver(auth_token, "flagged-movie-poster-celebrity-swap") pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=auth_token) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') pipe = pipe.to(device) def encode_pil_to_base64(pil_image): ''' From: https://github.com/gradio-app/gradio/blob/main/gradio/processing_utils.py''' with BytesIO() as output_bytes: # Copy any text-only metadata use_metadata = False metadata = PngImagePlugin.PngInfo() for key, value in pil_image.info.items(): if isinstance(key, str) and isinstance(value, str): metadata.add_text(key, value) use_metadata = True pil_image.save( output_bytes, "PNG", pnginfo=(metadata if use_metadata else None) ) bytes_data = output_bytes.getvalue() base64_str = str(base64.b64encode(bytes_data), "utf-8") return "data:image/png;base64," + base64_str def decode_base64_to_image(encoding): ''' From: https://github.com/gradio-app/gradio/blob/main/gradio/processing_utils.py''' content = encoding.split(";")[1] image_encoded = content.split(",")[1] return Image.open(BytesIO(base64.b64decode(image_encoded))) def improve_image(img, scale=2): ''' Improves an input image using GFP-GAN Inputs img (PIL): image to improve scale (int): scale factor for new image Output Improved image. If the request to GFPGAN is unsuccesful, it returns a black image. ''' url = "https://hf.space/embed/NotFungibleIO/GFPGAN/+/api/predict" request_objt = {"data":[encode_pil_to_base64(img),'v1.3', scale]} try: imp_img = decode_base64_to_image(requests.post(url, json=request_objt).json()['data'][0]) except AttributeError: return Image.new('RGB', size=(512, 512)) return imp_img def generate(celebrity, movie, guidance, improve_flag, scale): # add scale as var prompt = f"A movie poster of {celebrity} in {movie}." image = pipe(prompt, guidance=guidance).images[0] if improve_flag: image = improve_image(image, scale=scale) return image movie_options = movie_options = ["Casino Royale (2006)", "Quantum of Solace (2008)", "Skyfall (2012)", "Spectre (2015)", "No Time To Die (2021)"] title = "Find the next James Bond" description = "Write the name of a celebrity, and pick a Bond movie from the dropdown menu.\ This will generate a new movie poster (inspired by the chosen movie)\ with the new celebrity in it. See below for explanation of the\ input variables." article= "Inputs explained: \n Guidance: the lower, the more random the output\ image. Improve and scale: if selected, the image resolution will be increased\ using GFP-GAN (google it), and it's size increased (if scale is >1)." demo = gr.Interface( fn=generate, inputs=[gr.Textbox(value="Emma Watson"), gr.Dropdown(movie_options, value="Skyfall (2012)"), gr.Slider(1, 20, value=7.5, step=0.5), gr.Checkbox(label="Improve and scale? (takes longer to process)"), gr.Slider(1, 3, value=1, step=0.5) ], outputs='image', title=title, description=description, article=article, allow_flagging="manual", flagging_options=["wrong celebrity", "wrong movie", "face not shown", "nonsensical output"], flagging_callback=hf_writer ) demo.launch()