import torch from diffusers import DiffusionPipeline import gradio as gr import numpy as np import openai import os import spaces import base64 # Setup logging # logging.basicConfig(level=logging.DEBUG) # logger = logging.getLogger(__name__) # Retrieve the OpenAI API key from the environment API_KEY = os.getenv('OPEN_AI_API_KEYS') DESCRIPTION = '''

Chimera Image Generation

This contains a Stable Diffusor from stabilityai/stable-diffusion-xl-base-1.0

For Instructions on how to use the models view this

''' # load both base and refiner base = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16").to("cuda:0") refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", text_encoder_2=base.text_encoder_2, vae=base.vae, torch_dtype=torch.float16, use_safetensor=True, variant="fp16").to("cuda:0") chat_mode = {} def encode_image(image_path): chat_mode["the_mode"] = "diffusing" with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') def generation(message, history): """ Generates a response based on the input message and optionally an image. """ global chat_mode image_path = None if "files" in message and message["files"]: if type(message["files"][-1]) == dict: image_path = message["files"][-1]["path"] else: image_path = message["files"][-1] else: for hist in history: if type(hist[0]) == tuple: image_path = hist[0][0] input_prompt = message if isinstance(message, str) else message.get("text", "") if image_path is None: chat_mode["mode"] = "text" client = openai.OpenAI(api_key=API_KEY) stream = client.chat.completions.create( model="gpt-3.5-turbo", messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."}, {"role": "user", "content": input_prompt}], stream=True, ) return stream else: chat_mode["mode"] = "image" base64_image = encode_image(image_path=image_path) client = openai.OpenAI(api_key=API_KEY) stream = client.chat.completions.create( model="gpt-4o", messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."}, {"role": "user", "content": [ {"type": "text", "text": input_prompt}, {"type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" }} ]}], stream=True, ) return stream # function to take input and generate text tokena @spaces.GPU(duration=120) def diffusing(prompt: str, n_steps: int, denoising: float): """ Takes input, passes it into the pipeline, get the top 5 scores, and ouput those scores into images """ # Generate image based on text image_base = base( prompt=prompt, num_inference_steps=n_steps, denoising_end=denoising, output_type="latent" ).images image = refiner( prompt=prompt, num_inference_steps=n_steps, denoising_start=denoising, image=image_base ).images[0] return image def check_cuda_availability(): if torch.cuda.is_available(): return f"GPU: {torch.cuda.get_device_name(0)}" else: return "No CUDA device found." # Image created from diffusing image_created = {} @spaces.GPU(duration=120) def bot_comms(message, history): """ Handles communication between Gradio and the models. """ # ensures message is a dictionary if not isinstance(message, dict): message = {"text": message} if message["text"] == "check cuda": yield check_cuda_availability() return buffer = "" gpt_outputs = [] stream = generation(message, history) for chunk in stream: if chunk.choices[0].delta.content is not None: text = chunk.choices[0].delta.content if text: gpt_outputs.append(text) buffer += text yield "".join(gpt_outputs) chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False) with gr.Blocks(fill_height=True) as demo: with gr.Row(): # Diffusing with gr.Column(): gr.Markdown(DESCRIPTION) image_prompt = gr.Textbox(label="Image Prompt") output_image = gr.Image(label="Generated Image") generate_image_button = gr.Button("Generate Image") # generate_image_button.click(fn=diffusing, inputs=image_prompt, outputs=output_image) with gr.Accordion(label="⚙️ Parameters", open=False): steps_slider = gr.Slider( minimum=20, maximum=100, step=1, value=40, label="Number of Inference Steps" ) denoising_slider = gr.Slider( minimum=0.0, maximum=1.0, step=0.1, value=0.8, label="High Noise Fraction" ) generate_image_button.click( fn=diffusing, inputs=[image_prompt, steps_slider, denoising_slider], outputs=output_image ) with gr.Column(): # GPT-3.5 gr.Markdown('''

Chimera Text Generation

This contains a Generative LLM from Open AI called GPT-3.5-Turbo and Vision.

For Instructions on how to use the models view this

''') chat = gr.ChatInterface(fn=bot_comms, multimodal=True, textbox=chat_input) demo.launch()