Spaces:

sandz7
/

chimera

Runtime error

File size: 6,914 Bytes

7f412e3
 
 
 
3f8d160
fc1b391
18b3737
ed67eef
fc1b391
c8fdeaf
ba630ab
 
c8fdeaf
3f8d160
fc1b391
7f412e3
 
 
a391a20
784fb3b
c4de996
7f412e3
 
a423985
99258b1
6eabca8
99258b1
 
 
 
 
e134cad
7f412e3
8f19d06
 
d56c421
7e2682f
d56c421
64ae96b
d56c421
fe0d6a2
 
 
 
 
d56c421
 
 
 
 
 
 
 
 
 
 
54d81ef
 
d56c421
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78e5800
d56c421
 
a338474
 
 
 
a3049c0
 
3f8d160
a338474
 
3f8d160
a338474
 
ba630ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81837b1
fc1b391
 
3f8d160
fc1b391
 
81837b1
0d883c8
 
81837b1
3f8d160
0e1a0ae
3f8d160
 
 
3a1fb15
 
 
 
fc1b391
d5fb537
dd8c358
7e2682f
3f8d160
 
 
fe0d6a2
876591e
 
 
 
 
 
 
 
8f19d06
 
3f8d160
a338474
e4e1a0b
8f19d06
e4e1a0b
 
 
 
 
469bb0c
53d6419
469bb0c
8a1078b
 
 
 
53d6419
 
469bb0c
8a1078b
 
 
 
53d6419
 
469bb0c
 
 
 
 
e4e1a0b
8f19d06
a391a20
 
 
f6da98c
c4de996
a391a20
 
5319e7d
 
 
7f412e3
3d6195d

import torch
from diffusers import DiffusionPipeline
import gradio as gr
import numpy as np
import openai
import os
import spaces
import base64

# Setup logging
# logging.basicConfig(level=logging.DEBUG)
# logger = logging.getLogger(__name__)

# Retrieve the OpenAI API key from the environment
API_KEY = os.getenv('OPEN_AI_API_KEYS')

DESCRIPTION = '''
<div>
<h1 style="text-align: center;">Chimera Image Generation</h1>
<p style="text-align: center;">This contains a Stable Diffusor from <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"><b>stabilityai/stable-diffusion-xl-base-1.0</b></a></p>
<p style="text-align: center;">For Instructions on how to use the models <a href="https://huggingface.co/spaces/sandz7/chimera/blob/main/README.md"><b>view this</b></a></p>
</div>
'''

# load both base and refiner
base = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16").to("cuda:0")
refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0",
                                            text_encoder_2=base.text_encoder_2,
                                            vae=base.vae,
                                            torch_dtype=torch.float16,
                                            use_safetensor=True,
                                            variant="fp16").to("cuda:0")

chat_mode = {}

def encode_image(image_path):
    chat_mode["the_mode"] = "diffusing"
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def generation(message, history):
    """
    Generates a response based on the input message and optionally an image.
    """
    global chat_mode
    image_path = None
    if "files" in message and message["files"]:
        if type(message["files"][-1]) == dict:
            image_path = message["files"][-1]["path"]
        else:
            image_path = message["files"][-1]
    else:
        for hist in history:
            if type(hist[0]) == tuple:
                image_path = hist[0][0]

    input_prompt = message if isinstance(message, str) else message.get("text", "")

    if image_path is None:
        chat_mode["mode"] = "text"
        client = openai.OpenAI(api_key=API_KEY)
        stream = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."},
                        {"role": "user", "content": input_prompt}],
            stream=True,
        )
        return stream
    else:
        chat_mode["mode"] = "image"
        base64_image = encode_image(image_path=image_path)
        client = openai.OpenAI(api_key=API_KEY)
        stream = client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."},
                        {"role": "user", "content": [
                            {"type": "text", "text": input_prompt},
                            {"type": "image_url", "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            }}
                        ]}],
            stream=True,
        )
        return stream

# function to take input and generate text tokena
@spaces.GPU(duration=120)
def diffusing(prompt: str,
              n_steps: int,
              denoising: float):
    """
    Takes input, passes it into the pipeline, 
    get the top 5 scores, and ouput those scores into images
    """

    # Generate image based on text
    image_base = base(
        prompt=prompt,
        num_inference_steps=n_steps,
        denoising_end=denoising,
        output_type="latent"
    ).images

    image = refiner(
        prompt=prompt,
        num_inference_steps=n_steps,
        denoising_start=denoising,
        image=image_base
    ).images[0]
    
    return image
    
def check_cuda_availability():
    if torch.cuda.is_available():
        return f"GPU: {torch.cuda.get_device_name(0)}"
    else:
        return "No CUDA device found."
    
# Image created from diffusing
image_created = {}

@spaces.GPU(duration=120)
def bot_comms(message, history):
    """
    Handles communication between Gradio and the models.
    """

    # ensures message is a dictionary
    if not isinstance(message, dict):
        message = {"text": message}

    if message["text"] == "check cuda":
        yield check_cuda_availability()
        return
        
    buffer = ""
    gpt_outputs = []
    stream = generation(message, history)

    for chunk in stream:
        if chunk.choices[0].delta.content is not None:
            text = chunk.choices[0].delta.content
            if text:
                gpt_outputs.append(text)
                buffer += text
            yield "".join(gpt_outputs)

chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False)

with gr.Blocks(fill_height=True) as demo:
    with gr.Row():
        # Diffusing
        with gr.Column():
            gr.Markdown(DESCRIPTION)
            image_prompt = gr.Textbox(label="Image Prompt")
            output_image = gr.Image(label="Generated Image")
            generate_image_button = gr.Button("Generate Image")
            # generate_image_button.click(fn=diffusing, inputs=image_prompt, outputs=output_image)
            with gr.Accordion(label="⚙️ Parameters", open=False):
                steps_slider = gr.Slider(
                    minimum=20,
                    maximum=100,
                    step=1,
                    value=40,
                    label="Number of Inference Steps"
                )
                denoising_slider = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    step=0.1,
                    value=0.8,
                    label="High Noise Fraction"
                )
            generate_image_button.click(
                fn=diffusing,
                inputs=[image_prompt, steps_slider, denoising_slider],
                outputs=output_image
            )
        with gr.Column():
            # GPT-3.5
            gr.Markdown('''
<div>
<h1 style="text-align: center;">Chimera Text Generation</h1>
<p style="text-align: center;">This contains a Generative LLM from <a href="https://openai.com/"><b>Open AI</b></a> called GPT-3.5-Turbo and Vision.</p>
<p style="text-align: center;">For Instructions on how to use the models <a href="https://huggingface.co/spaces/sandz7/chimera/blob/main/README.md"><b>view this</b></a></p>
</div>
''')
            chat = gr.ChatInterface(fn=bot_comms,
                                    multimodal=True,
                                    textbox=chat_input)

demo.launch()