File size: 6,914 Bytes
7f412e3 3f8d160 fc1b391 18b3737 ed67eef fc1b391 c8fdeaf ba630ab c8fdeaf 3f8d160 fc1b391 7f412e3 a391a20 784fb3b c4de996 7f412e3 a423985 99258b1 6eabca8 99258b1 e134cad 7f412e3 8f19d06 d56c421 7e2682f d56c421 64ae96b d56c421 fe0d6a2 d56c421 54d81ef d56c421 78e5800 d56c421 a338474 a3049c0 3f8d160 a338474 3f8d160 a338474 ba630ab 81837b1 fc1b391 3f8d160 fc1b391 81837b1 0d883c8 81837b1 3f8d160 0e1a0ae 3f8d160 3a1fb15 fc1b391 d5fb537 dd8c358 7e2682f 3f8d160 fe0d6a2 876591e 8f19d06 3f8d160 a338474 e4e1a0b 8f19d06 e4e1a0b 469bb0c 53d6419 469bb0c 8a1078b 53d6419 469bb0c 8a1078b 53d6419 469bb0c e4e1a0b 8f19d06 a391a20 f6da98c c4de996 a391a20 5319e7d 7f412e3 3d6195d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
import torch
from diffusers import DiffusionPipeline
import gradio as gr
import numpy as np
import openai
import os
import spaces
import base64
# Setup logging
# logging.basicConfig(level=logging.DEBUG)
# logger = logging.getLogger(__name__)
# Retrieve the OpenAI API key from the environment
API_KEY = os.getenv('OPEN_AI_API_KEYS')
DESCRIPTION = '''
<div>
<h1 style="text-align: center;">Chimera Image Generation</h1>
<p style="text-align: center;">This contains a Stable Diffusor from <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"><b>stabilityai/stable-diffusion-xl-base-1.0</b></a></p>
<p style="text-align: center;">For Instructions on how to use the models <a href="https://huggingface.co/spaces/sandz7/chimera/blob/main/README.md"><b>view this</b></a></p>
</div>
'''
# load both base and refiner
base = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16").to("cuda:0")
refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0",
text_encoder_2=base.text_encoder_2,
vae=base.vae,
torch_dtype=torch.float16,
use_safetensor=True,
variant="fp16").to("cuda:0")
chat_mode = {}
def encode_image(image_path):
chat_mode["the_mode"] = "diffusing"
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def generation(message, history):
"""
Generates a response based on the input message and optionally an image.
"""
global chat_mode
image_path = None
if "files" in message and message["files"]:
if type(message["files"][-1]) == dict:
image_path = message["files"][-1]["path"]
else:
image_path = message["files"][-1]
else:
for hist in history:
if type(hist[0]) == tuple:
image_path = hist[0][0]
input_prompt = message if isinstance(message, str) else message.get("text", "")
if image_path is None:
chat_mode["mode"] = "text"
client = openai.OpenAI(api_key=API_KEY)
stream = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."},
{"role": "user", "content": input_prompt}],
stream=True,
)
return stream
else:
chat_mode["mode"] = "image"
base64_image = encode_image(image_path=image_path)
client = openai.OpenAI(api_key=API_KEY)
stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."},
{"role": "user", "content": [
{"type": "text", "text": input_prompt},
{"type": "image_url", "image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}}
]}],
stream=True,
)
return stream
# function to take input and generate text tokena
@spaces.GPU(duration=120)
def diffusing(prompt: str,
n_steps: int,
denoising: float):
"""
Takes input, passes it into the pipeline,
get the top 5 scores, and ouput those scores into images
"""
# Generate image based on text
image_base = base(
prompt=prompt,
num_inference_steps=n_steps,
denoising_end=denoising,
output_type="latent"
).images
image = refiner(
prompt=prompt,
num_inference_steps=n_steps,
denoising_start=denoising,
image=image_base
).images[0]
return image
def check_cuda_availability():
if torch.cuda.is_available():
return f"GPU: {torch.cuda.get_device_name(0)}"
else:
return "No CUDA device found."
# Image created from diffusing
image_created = {}
@spaces.GPU(duration=120)
def bot_comms(message, history):
"""
Handles communication between Gradio and the models.
"""
# ensures message is a dictionary
if not isinstance(message, dict):
message = {"text": message}
if message["text"] == "check cuda":
yield check_cuda_availability()
return
buffer = ""
gpt_outputs = []
stream = generation(message, history)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
text = chunk.choices[0].delta.content
if text:
gpt_outputs.append(text)
buffer += text
yield "".join(gpt_outputs)
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False)
with gr.Blocks(fill_height=True) as demo:
with gr.Row():
# Diffusing
with gr.Column():
gr.Markdown(DESCRIPTION)
image_prompt = gr.Textbox(label="Image Prompt")
output_image = gr.Image(label="Generated Image")
generate_image_button = gr.Button("Generate Image")
# generate_image_button.click(fn=diffusing, inputs=image_prompt, outputs=output_image)
with gr.Accordion(label="βοΈ Parameters", open=False):
steps_slider = gr.Slider(
minimum=20,
maximum=100,
step=1,
value=40,
label="Number of Inference Steps"
)
denoising_slider = gr.Slider(
minimum=0.0,
maximum=1.0,
step=0.1,
value=0.8,
label="High Noise Fraction"
)
generate_image_button.click(
fn=diffusing,
inputs=[image_prompt, steps_slider, denoising_slider],
outputs=output_image
)
with gr.Column():
# GPT-3.5
gr.Markdown('''
<div>
<h1 style="text-align: center;">Chimera Text Generation</h1>
<p style="text-align: center;">This contains a Generative LLM from <a href="https://openai.com/"><b>Open AI</b></a> called GPT-3.5-Turbo and Vision.</p>
<p style="text-align: center;">For Instructions on how to use the models <a href="https://huggingface.co/spaces/sandz7/chimera/blob/main/README.md"><b>view this</b></a></p>
</div>
''')
chat = gr.ChatInterface(fn=bot_comms,
multimodal=True,
textbox=chat_input)
demo.launch()
|