File size: 6,889 Bytes
48e85e0
46e059f
48e85e0
46e059f
48e85e0
46e059f
 
48e85e0
 
46e059f
 
 
 
 
48e85e0
 
 
 
46e059f
 
 
48e85e0
 
 
46e059f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import torch
from diffusers import DiffusionPipeline
import gradio as gr
import numpy as np
import openai
import os
import spaces
import base64

# Setup logging
# logging.basicConfig(level=logging.DEBUG)
# logger = logging.getLogger(__name__)

# Retrieve the OpenAI API key from the environment
API_KEY = os.getenv('OPEN_AI_API_KEY')

DESCRIPTION = '''
<div>
<h1 style="text-align: center;">Book-Reader</h1>
<p style="text-align: center;">This contains a Stable Diffusor from <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"><b>stabilityai/stable-diffusion-xl-base-1.0</b></a></p>
<p style="text-align: center;">For Instructions on how to use the models <a href="https://huggingface.co/spaces/sandz7/chimera/blob/main/README.md"><b>view this</b></a></p>
</div>
'''

# load both base and refiner
base = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16").to("cuda:0")
refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0",
                                            text_encoder_2=base.text_encoder_2,
                                            vae=base.vae,
                                            torch_dtype=torch.float16,
                                            use_safetensor=True,
                                            variant="fp16").to("cuda:0")

chat_mode = {}

def encode_image(image_path):
    chat_mode["the_mode"] = "diffusing"
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def generation(message, history):
    """
    Generates a response based on the input message and optionally an image.
    """
    global chat_mode
    image_path = None
    if "files" in message and message["files"]:
        if type(message["files"][-1]) == dict:
            image_path = message["files"][-1]["path"]
        else:
            image_path = message["files"][-1]
    else:
        for hist in history:
            if type(hist[0]) == tuple:
                image_path = hist[0][0]

    input_prompt = message if isinstance(message, str) else message.get("text", "")

    if image_path is None:
        chat_mode["mode"] = "text"
        client = openai.OpenAI(api_key=API_KEY)
        stream = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."},
                        {"role": "user", "content": input_prompt}],
            stream=True,
        )
        return stream
    else:
        chat_mode["mode"] = "image"
        base64_image = encode_image(image_path=image_path)
        client = openai.OpenAI(api_key=API_KEY)
        stream = client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."},
                        {"role": "user", "content": [
                            {"type": "text", "text": input_prompt},
                            {"type": "image_url", "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            }}
                        ]}],
            stream=True,
        )
        return stream

# function to take input and generate text tokena
@spaces.GPU(duration=120)
def diffusing(prompt: str,
              n_steps: int,
              denoising: float):
    """
    Takes input, passes it into the pipeline, 
    get the top 5 scores, and ouput those scores into images
    """

    # Generate image based on text
    image_base = base(
        prompt=prompt,
        num_inference_steps=n_steps,
        denoising_end=denoising,
        output_type="latent"
    ).images

    image = refiner(
        prompt=prompt,
        num_inference_steps=n_steps,
        denoising_start=denoising,
        image=image_base
    ).images[0]
    
    return image
    
def check_cuda_availability():
    if torch.cuda.is_available():
        return f"GPU: {torch.cuda.get_device_name(0)}"
    else:
        return "No CUDA device found."
    
# Image created from diffusing
image_created = {}

@spaces.GPU(duration=120)
def bot_comms(message, history):
    """
    Handles communication between Gradio and the models.
    """

    # ensures message is a dictionary
    if not isinstance(message, dict):
        message = {"text": message}

    if message["text"] == "check cuda":
        yield check_cuda_availability()
        return
        
    buffer = ""
    gpt_outputs = []
    stream = generation(message, history)

    for chunk in stream:
        if chunk.choices[0].delta.content is not None:
            text = chunk.choices[0].delta.content
            if text:
                gpt_outputs.append(text)
                buffer += text
            yield "".join(gpt_outputs)

chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False)

with gr.Blocks(fill_height=True) as demo:
    with gr.Row():
        # Diffusing
        with gr.Column():
            gr.Markdown(DESCRIPTION)
            image_prompt = gr.Textbox(label="Image Prompt")
            output_image = gr.Image(label="Generated Image")
            generate_image_button = gr.Button("Generate Image")
            # generate_image_button.click(fn=diffusing, inputs=image_prompt, outputs=output_image)
            with gr.Accordion(label="⚙️ Parameters", open=False):
                steps_slider = gr.Slider(
                    minimum=20,
                    maximum=100,
                    step=1,
                    value=40,
                    label="Number of Inference Steps"
                )
                denoising_slider = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    step=0.1,
                    value=0.8,
                    label="High Noise Fraction"
                )
            generate_image_button.click(
                fn=diffusing,
                inputs=[image_prompt, steps_slider, denoising_slider],
                outputs=output_image
            )
        with gr.Column():
            # GPT-3.5
            gr.Markdown('''
<div>
<h1 style="text-align: center;">Smart Reader</h1>
<p style="text-align: center;">This contains a Generative LLM from <a href="https://openai.com/"><b>Open AI</b></a> called GPT-3.5-Turbo and Vision.</p>
<p style="text-align: center;">For Instructions on how to use the models <a href="https://huggingface.co/spaces/sandz7/chimera/blob/main/README.md"><b>view this</b></a></p>
</div>
''')
            chat = gr.ChatInterface(fn=bot_comms,
                                    multimodal=True,
                                    textbox=chat_input)

demo.launch()