|
import torch |
|
from diffusers import DiffusionPipeline |
|
import gradio as gr |
|
import numpy as np |
|
import openai |
|
import os |
|
import spaces |
|
import base64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
API_KEY = os.getenv('OPEN_AI_API_KEYS') |
|
|
|
DESCRIPTION = ''' |
|
<div> |
|
<h1 style="text-align: center;">Chimera Image Generation</h1> |
|
<p style="text-align: center;">This contains a Stable Diffusor from <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"><b>stabilityai/stable-diffusion-xl-base-1.0</b></a></p> |
|
<p style="text-align: center;">For Instructions on how to use the models <a href="https://huggingface.co/spaces/sandz7/chimera/blob/main/README.md"><b>view this</b></a></p> |
|
</div> |
|
''' |
|
|
|
|
|
base = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16").to("cuda:0") |
|
refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", |
|
text_encoder_2=base.text_encoder_2, |
|
vae=base.vae, |
|
torch_dtype=torch.float16, |
|
use_safetensor=True, |
|
variant="fp16").to("cuda:0") |
|
|
|
chat_mode = {} |
|
|
|
def encode_image(image_path): |
|
chat_mode["the_mode"] = "diffusing" |
|
with open(image_path, "rb") as image_file: |
|
return base64.b64encode(image_file.read()).decode('utf-8') |
|
|
|
def generation(message, history): |
|
""" |
|
Generates a response based on the input message and optionally an image. |
|
""" |
|
global chat_mode |
|
image_path = None |
|
if "files" in message and message["files"]: |
|
if type(message["files"][-1]) == dict: |
|
image_path = message["files"][-1]["path"] |
|
else: |
|
image_path = message["files"][-1] |
|
else: |
|
for hist in history: |
|
if type(hist[0]) == tuple: |
|
image_path = hist[0][0] |
|
|
|
input_prompt = message if isinstance(message, str) else message.get("text", "") |
|
|
|
if image_path is None: |
|
chat_mode["mode"] = "text" |
|
client = openai.OpenAI(api_key=API_KEY) |
|
stream = client.chat.completions.create( |
|
model="gpt-3.5-turbo", |
|
messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."}, |
|
{"role": "user", "content": input_prompt}], |
|
stream=True, |
|
) |
|
return stream |
|
else: |
|
chat_mode["mode"] = "image" |
|
base64_image = encode_image(image_path=image_path) |
|
client = openai.OpenAI(api_key=API_KEY) |
|
stream = client.chat.completions.create( |
|
model="gpt-4o", |
|
messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."}, |
|
{"role": "user", "content": [ |
|
{"type": "text", "text": input_prompt}, |
|
{"type": "image_url", "image_url": { |
|
"url": f"data:image/jpeg;base64,{base64_image}" |
|
}} |
|
]}], |
|
stream=True, |
|
) |
|
return stream |
|
|
|
|
|
@spaces.GPU(duration=120) |
|
def diffusing(prompt: str, |
|
n_steps: int, |
|
denoising: float): |
|
""" |
|
Takes input, passes it into the pipeline, |
|
get the top 5 scores, and ouput those scores into images |
|
""" |
|
|
|
|
|
image_base = base( |
|
prompt=prompt, |
|
num_inference_steps=n_steps, |
|
denoising_end=denoising, |
|
output_type="latent" |
|
).images |
|
|
|
image = refiner( |
|
prompt=prompt, |
|
num_inference_steps=n_steps, |
|
denoising_start=denoising, |
|
image=image_base |
|
).images[0] |
|
|
|
return image |
|
|
|
def check_cuda_availability(): |
|
if torch.cuda.is_available(): |
|
return f"GPU: {torch.cuda.get_device_name(0)}" |
|
else: |
|
return "No CUDA device found." |
|
|
|
|
|
image_created = {} |
|
|
|
@spaces.GPU(duration=120) |
|
def bot_comms(message, history): |
|
""" |
|
Handles communication between Gradio and the models. |
|
""" |
|
|
|
|
|
if not isinstance(message, dict): |
|
message = {"text": message} |
|
|
|
if message["text"] == "check cuda": |
|
yield check_cuda_availability() |
|
return |
|
|
|
buffer = "" |
|
gpt_outputs = [] |
|
stream = generation(message, history) |
|
|
|
for chunk in stream: |
|
if chunk.choices[0].delta.content is not None: |
|
text = chunk.choices[0].delta.content |
|
if text: |
|
gpt_outputs.append(text) |
|
buffer += text |
|
yield "".join(gpt_outputs) |
|
|
|
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False) |
|
|
|
with gr.Blocks(fill_height=True) as demo: |
|
with gr.Row(): |
|
|
|
with gr.Column(): |
|
gr.Markdown(DESCRIPTION) |
|
image_prompt = gr.Textbox(label="Image Prompt") |
|
output_image = gr.Image(label="Generated Image") |
|
generate_image_button = gr.Button("Generate Image") |
|
|
|
with gr.Accordion(label="βοΈ Parameters", open=False): |
|
steps_slider = gr.Slider( |
|
minimum=20, |
|
maximum=100, |
|
step=1, |
|
value=40, |
|
label="Number of Inference Steps" |
|
) |
|
denoising_slider = gr.Slider( |
|
minimum=0.0, |
|
maximum=1.0, |
|
step=0.1, |
|
value=0.8, |
|
label="High Noise Fraction" |
|
) |
|
generate_image_button.click( |
|
fn=diffusing, |
|
inputs=[image_prompt, steps_slider, denoising_slider], |
|
outputs=output_image |
|
) |
|
with gr.Column(): |
|
|
|
gr.Markdown(''' |
|
<div> |
|
<h1 style="text-align: center;">Chimera Text Generation</h1> |
|
<p style="text-align: center;">This contains a Generative LLM from <a href="https://openai.com/"><b>Open AI</b></a> called GPT-3.5-Turbo and Vision.</p> |
|
<p style="text-align: center;">For Instructions on how to use the models <a href="https://huggingface.co/spaces/sandz7/chimera/blob/main/README.md"><b>view this</b></a></p> |
|
</div> |
|
''') |
|
chat = gr.ChatInterface(fn=bot_comms, |
|
multimodal=True, |
|
textbox=chat_input) |
|
|
|
demo.launch() |
|
|