Spaces:
Runtime error
Runtime error
import torch | |
from diffusers import DiffusionPipeline | |
import gradio as gr | |
import numpy as np | |
import openai | |
import os | |
import spaces | |
import base64 | |
# Setup logging | |
# logging.basicConfig(level=logging.DEBUG) | |
# logger = logging.getLogger(__name__) | |
# Retrieve the OpenAI API key from the environment | |
API_KEY = os.getenv('OPEN_AI_API_KEY') | |
DESCRIPTION = ''' | |
<div> | |
<h1 style="text-align: center;">Book-Reader</h1> | |
<p style="text-align: center;">This contains a Stable Diffusor from <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"><b>stabilityai/stable-diffusion-xl-base-1.0</b></a></p> | |
<p style="text-align: center;">For Instructions on how to use the models <a href="https://huggingface.co/spaces/sandz7/chimera/blob/main/README.md"><b>view this</b></a></p> | |
</div> | |
''' | |
# load both base and refiner | |
base = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16").to("cuda:0") | |
refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", | |
text_encoder_2=base.text_encoder_2, | |
vae=base.vae, | |
torch_dtype=torch.float16, | |
use_safetensor=True, | |
variant="fp16").to("cuda:0") | |
chat_mode = {} | |
def encode_image(image_path): | |
chat_mode["the_mode"] = "diffusing" | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode('utf-8') | |
def generation(message, history): | |
""" | |
Generates a response based on the input message and optionally an image. | |
""" | |
global chat_mode | |
image_path = None | |
if "files" in message and message["files"]: | |
if type(message["files"][-1]) == dict: | |
image_path = message["files"][-1]["path"] | |
else: | |
image_path = message["files"][-1] | |
else: | |
for hist in history: | |
if type(hist[0]) == tuple: | |
image_path = hist[0][0] | |
input_prompt = message if isinstance(message, str) else message.get("text", "") | |
if image_path is None: | |
chat_mode["mode"] = "text" | |
client = openai.OpenAI(api_key=API_KEY) | |
stream = client.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."}, | |
{"role": "user", "content": input_prompt}], | |
stream=True, | |
) | |
return stream | |
else: | |
chat_mode["mode"] = "image" | |
base64_image = encode_image(image_path=image_path) | |
client = openai.OpenAI(api_key=API_KEY) | |
stream = client.chat.completions.create( | |
model="gpt-4o", | |
messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."}, | |
{"role": "user", "content": [ | |
{"type": "text", "text": input_prompt}, | |
{"type": "image_url", "image_url": { | |
"url": f"data:image/jpeg;base64,{base64_image}" | |
}} | |
]}], | |
stream=True, | |
) | |
return stream | |
# function to take input and generate text tokena | |
def diffusing(prompt: str, | |
n_steps: int, | |
denoising: float): | |
""" | |
Takes input, passes it into the pipeline, | |
get the top 5 scores, and ouput those scores into images | |
""" | |
# Generate image based on text | |
image_base = base( | |
prompt=prompt, | |
num_inference_steps=n_steps, | |
denoising_end=denoising, | |
output_type="latent" | |
).images | |
image = refiner( | |
prompt=prompt, | |
num_inference_steps=n_steps, | |
denoising_start=denoising, | |
image=image_base | |
).images[0] | |
return image | |
def check_cuda_availability(): | |
if torch.cuda.is_available(): | |
return f"GPU: {torch.cuda.get_device_name(0)}" | |
else: | |
return "No CUDA device found." | |
# Image created from diffusing | |
image_created = {} | |
def bot_comms(message, history): | |
""" | |
Handles communication between Gradio and the models. | |
""" | |
# ensures message is a dictionary | |
if not isinstance(message, dict): | |
message = {"text": message} | |
if message["text"] == "check cuda": | |
yield check_cuda_availability() | |
return | |
buffer = "" | |
gpt_outputs = [] | |
stream = generation(message, history) | |
for chunk in stream: | |
if chunk.choices[0].delta.content is not None: | |
text = chunk.choices[0].delta.content | |
if text: | |
gpt_outputs.append(text) | |
buffer += text | |
yield "".join(gpt_outputs) | |
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False) | |
with gr.Blocks(fill_height=True) as demo: | |
with gr.Row(): | |
# Diffusing | |
with gr.Column(): | |
gr.Markdown(DESCRIPTION) | |
image_prompt = gr.Textbox(label="Image Prompt") | |
output_image = gr.Image(label="Generated Image") | |
generate_image_button = gr.Button("Generate Image") | |
# generate_image_button.click(fn=diffusing, inputs=image_prompt, outputs=output_image) | |
with gr.Accordion(label="⚙️ Parameters", open=False): | |
steps_slider = gr.Slider( | |
minimum=20, | |
maximum=100, | |
step=1, | |
value=40, | |
label="Number of Inference Steps" | |
) | |
denoising_slider = gr.Slider( | |
minimum=0.0, | |
maximum=1.0, | |
step=0.1, | |
value=0.8, | |
label="High Noise Fraction" | |
) | |
generate_image_button.click( | |
fn=diffusing, | |
inputs=[image_prompt, steps_slider, denoising_slider], | |
outputs=output_image | |
) | |
with gr.Column(): | |
# GPT-3.5 | |
gr.Markdown(''' | |
<div> | |
<h1 style="text-align: center;">Smart Reader</h1> | |
<p style="text-align: center;">This contains a Generative LLM from <a href="https://openai.com/"><b>Open AI</b></a> called GPT-3.5-Turbo and Vision.</p> | |
<p style="text-align: center;">For Instructions on how to use the models <a href="https://huggingface.co/spaces/sandz7/chimera/blob/main/README.md"><b>view this</b></a></p> | |
</div> | |
''') | |
chat = gr.ChatInterface(fn=bot_comms, | |
multimodal=True, | |
textbox=chat_input) | |
demo.launch() | |