revised it
Browse files
app.py
CHANGED
@@ -3,15 +3,12 @@ from transformers import TextIteratorStreamer, AutoProcessor, LlavaForConditiona
|
|
3 |
from diffusers import DiffusionPipeline
|
4 |
import gradio as gr
|
5 |
import numpy as np
|
6 |
-
import accelerate
|
7 |
-
import spaces
|
8 |
from PIL import Image
|
9 |
import threading
|
10 |
-
|
11 |
import os
|
12 |
-
import asyncio
|
13 |
-
from typing import Any
|
14 |
|
|
|
15 |
API_KEY = os.getenv('OPEN_AI_API_KEYS')
|
16 |
|
17 |
DESCRIPTION = '''
|
@@ -21,7 +18,7 @@ DESCRIPTION = '''
|
|
21 |
</div>
|
22 |
'''
|
23 |
|
24 |
-
#
|
25 |
llava_model = LlavaForConditionalGeneration.from_pretrained(
|
26 |
"xtuner/llava-llama-3-8b-v1_1-transformers",
|
27 |
torch_dtype=torch.float16,
|
@@ -29,10 +26,12 @@ llava_model = LlavaForConditionalGeneration.from_pretrained(
|
|
29 |
)
|
30 |
|
31 |
llava_model.to("cuda:0")
|
|
|
32 |
processor = AutoProcessor.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
|
|
|
33 |
llava_model.generation_config.eos_token_id = 128009
|
34 |
|
35 |
-
# Stable
|
36 |
base = DiffusionPipeline.from_pretrained(
|
37 |
"stabilityai/stable-diffusion-xl-base-1.0",
|
38 |
torch_dtype=torch.float16,
|
@@ -52,27 +51,28 @@ refiner = DiffusionPipeline.from_pretrained(
|
|
52 |
refiner.to('cuda')
|
53 |
|
54 |
def multimodal_and_generation(message, history):
|
|
|
|
|
|
|
55 |
print(f"Message:\n{message}\nType:\n{type(message)}")
|
56 |
image_path = None
|
57 |
-
if message["files"]:
|
58 |
-
if
|
59 |
image_path = message["files"][-1]["path"]
|
60 |
else:
|
61 |
image_path = message["files"][-1]
|
62 |
else:
|
63 |
for hist in history:
|
64 |
-
if
|
65 |
image_path = hist[0][0]
|
66 |
|
67 |
if image_path is None:
|
68 |
input_prompt = message["text"]
|
69 |
-
client = OpenAI(api_key=API_KEY)
|
70 |
-
stream = client.
|
71 |
model="gpt-3.5-turbo",
|
72 |
-
messages=[
|
73 |
-
|
74 |
-
{"role": "user", "content": input_prompt}
|
75 |
-
],
|
76 |
stream=True,
|
77 |
)
|
78 |
return stream
|
@@ -87,8 +87,11 @@ def multimodal_and_generation(message, history):
|
|
87 |
thread.start()
|
88 |
|
89 |
return streamer
|
90 |
-
|
91 |
def diffusing(prompt):
|
|
|
|
|
|
|
92 |
image = base(
|
93 |
prompt=prompt,
|
94 |
num_inference_steps=40,
|
@@ -105,124 +108,62 @@ def diffusing(prompt):
|
|
105 |
|
106 |
def check_cuda_availability():
|
107 |
if torch.cuda.is_available():
|
108 |
-
|
109 |
-
return result
|
110 |
else:
|
111 |
return "No CUDA device found."
|
112 |
|
113 |
mode = ""
|
114 |
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
# result = check_cuda_availability()
|
121 |
-
# yield result
|
122 |
-
# return
|
123 |
-
|
124 |
-
# if message == "imagery":
|
125 |
-
# mode = message
|
126 |
-
# yield "Imagery On! Type your prompt to make the image πΌοΈ"
|
127 |
-
# return
|
128 |
-
|
129 |
-
# if message == "chatting":
|
130 |
-
# mode = message
|
131 |
-
# yield "Imagery Off. Ask me any questions. βοΈ"
|
132 |
-
# return
|
133 |
-
|
134 |
-
# if mode == "imagery":
|
135 |
-
# print("On imagery\n\n")
|
136 |
-
# image = diffusing(
|
137 |
-
# prompt=message,
|
138 |
-
# )
|
139 |
-
# yield image
|
140 |
-
# return
|
141 |
-
|
142 |
-
# if mode == "chatting" or mode == "":
|
143 |
-
# print("On chatting or no mode.\n\n")
|
144 |
-
# stream = multimodal_and_generation(
|
145 |
-
# message=message,
|
146 |
-
# history=history,
|
147 |
-
# )
|
148 |
-
# gpt_outputs = []
|
149 |
-
# async for chunk in stream:
|
150 |
-
# if chunk.choices[0].delta.content is not None:
|
151 |
-
# text = chunk.choices[0].delta.content
|
152 |
-
# gpt_outputs.append(text)
|
153 |
-
# yield "".join(gpt_outputs)
|
154 |
-
|
155 |
-
async def bot_comms_async(message, history):
|
156 |
global mode
|
157 |
|
158 |
if message == "check cuda":
|
159 |
-
|
160 |
-
return
|
161 |
-
|
162 |
if message == "imagery":
|
163 |
mode = message
|
164 |
-
|
165 |
-
|
|
|
166 |
if message == "chatting":
|
167 |
mode = message
|
168 |
-
|
169 |
-
|
|
|
170 |
if mode == "imagery":
|
171 |
print("On imagery\n\n")
|
172 |
-
image = diffusing(
|
173 |
-
|
174 |
-
|
|
|
|
|
|
|
175 |
if mode == "chatting" or mode == "":
|
176 |
print("On chatting or no mode.\n\n")
|
177 |
-
stream = multimodal_and_generation(message
|
178 |
-
|
179 |
-
|
180 |
-
if chunk.choices[0].delta.content is not None:
|
181 |
text = chunk.choices[0].delta.content
|
182 |
gpt_outputs.append(text)
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
submit = gr.Button("Submit")
|
197 |
-
|
198 |
-
def user(message, history):
|
199 |
-
return "", history + [[message, None]]
|
200 |
-
|
201 |
-
def bot_response(message, history):
|
202 |
-
response = bot_comms(message, history)
|
203 |
-
return history + [[message, response]]
|
204 |
-
|
205 |
-
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
206 |
-
bot_response, [msg, chatbot], [msg, chatbot]
|
207 |
-
)
|
208 |
-
submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
209 |
-
bot_response, [msg, chatbot], [msg, chatbot]
|
210 |
)
|
211 |
|
212 |
if __name__ == "__main__":
|
213 |
-
demo.launch(
|
214 |
-
|
215 |
-
# chatbot = gr.Chatbot(height=600, label="Chimera AI")
|
216 |
-
# chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False)
|
217 |
-
# with gr.Blocks(fill_height=True) as demo:
|
218 |
-
# gr.Markdown(DESCRIPTION)
|
219 |
-
# gr.ChatInterface(
|
220 |
-
# fn=bot_comms,
|
221 |
-
# chatbot=chatbot,
|
222 |
-
# fill_height=True,
|
223 |
-
# multimodal=True,
|
224 |
-
# textbox=chat_input,
|
225 |
-
# )
|
226 |
-
|
227 |
-
# if __name__ == "__main__":
|
228 |
-
# demo.launch()
|
|
|
3 |
from diffusers import DiffusionPipeline
|
4 |
import gradio as gr
|
5 |
import numpy as np
|
|
|
|
|
6 |
from PIL import Image
|
7 |
import threading
|
8 |
+
import openai
|
9 |
import os
|
|
|
|
|
10 |
|
11 |
+
# Retrieve the OpenAI API key from the environment
|
12 |
API_KEY = os.getenv('OPEN_AI_API_KEYS')
|
13 |
|
14 |
DESCRIPTION = '''
|
|
|
18 |
</div>
|
19 |
'''
|
20 |
|
21 |
+
# Initialize the models
|
22 |
llava_model = LlavaForConditionalGeneration.from_pretrained(
|
23 |
"xtuner/llava-llama-3-8b-v1_1-transformers",
|
24 |
torch_dtype=torch.float16,
|
|
|
26 |
)
|
27 |
|
28 |
llava_model.to("cuda:0")
|
29 |
+
|
30 |
processor = AutoProcessor.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
|
31 |
+
|
32 |
llava_model.generation_config.eos_token_id = 128009
|
33 |
|
34 |
+
# Initialize Stable Diffusion pipelines
|
35 |
base = DiffusionPipeline.from_pretrained(
|
36 |
"stabilityai/stable-diffusion-xl-base-1.0",
|
37 |
torch_dtype=torch.float16,
|
|
|
51 |
refiner.to('cuda')
|
52 |
|
53 |
def multimodal_and_generation(message, history):
|
54 |
+
"""
|
55 |
+
Generates a response based on the input message and optionally an image.
|
56 |
+
"""
|
57 |
print(f"Message:\n{message}\nType:\n{type(message)}")
|
58 |
image_path = None
|
59 |
+
if "files" in message and message["files"]:
|
60 |
+
if type(message["files"][-1]) == dict:
|
61 |
image_path = message["files"][-1]["path"]
|
62 |
else:
|
63 |
image_path = message["files"][-1]
|
64 |
else:
|
65 |
for hist in history:
|
66 |
+
if type(hist[0]) == tuple:
|
67 |
image_path = hist[0][0]
|
68 |
|
69 |
if image_path is None:
|
70 |
input_prompt = message["text"]
|
71 |
+
client = openai.OpenAI(api_key=API_KEY)
|
72 |
+
stream = client.ChatCompletion.create(
|
73 |
model="gpt-3.5-turbo",
|
74 |
+
messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."},
|
75 |
+
{"role": "user", "content": input_prompt}],
|
|
|
|
|
76 |
stream=True,
|
77 |
)
|
78 |
return stream
|
|
|
87 |
thread.start()
|
88 |
|
89 |
return streamer
|
90 |
+
|
91 |
def diffusing(prompt):
|
92 |
+
"""
|
93 |
+
Generates an image using Stable Diffusion based on the input prompt.
|
94 |
+
"""
|
95 |
image = base(
|
96 |
prompt=prompt,
|
97 |
num_inference_steps=40,
|
|
|
108 |
|
109 |
def check_cuda_availability():
|
110 |
if torch.cuda.is_available():
|
111 |
+
return f"GPU: {torch.cuda.get_device_name(0)}"
|
|
|
112 |
else:
|
113 |
return "No CUDA device found."
|
114 |
|
115 |
mode = ""
|
116 |
|
117 |
+
@spaces.GPU(duration=120)
|
118 |
+
def bot_comms(message, history):
|
119 |
+
"""
|
120 |
+
Handles communication between Gradio and the models.
|
121 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
global mode
|
123 |
|
124 |
if message == "check cuda":
|
125 |
+
yield check_cuda_availability()
|
126 |
+
return
|
127 |
+
|
128 |
if message == "imagery":
|
129 |
mode = message
|
130 |
+
yield "Imagery On! Type your prompt to make the image πΌοΈ"
|
131 |
+
return
|
132 |
+
|
133 |
if message == "chatting":
|
134 |
mode = message
|
135 |
+
yield "Imagery Off. Ask me any questions. βοΈ"
|
136 |
+
return
|
137 |
+
|
138 |
if mode == "imagery":
|
139 |
print("On imagery\n\n")
|
140 |
+
image = diffusing(message)
|
141 |
+
yield image
|
142 |
+
return
|
143 |
+
|
144 |
+
buffer = ""
|
145 |
+
gpt_outputs = []
|
146 |
if mode == "chatting" or mode == "":
|
147 |
print("On chatting or no mode.\n\n")
|
148 |
+
stream = multimodal_and_generation(message, history)
|
149 |
+
for chunk in stream:
|
150 |
+
if hasattr(chunk.choices[0].delta, "content"):
|
|
|
151 |
text = chunk.choices[0].delta.content
|
152 |
gpt_outputs.append(text)
|
153 |
+
yield "".join(gpt_outputs)
|
154 |
+
|
155 |
+
chatbot = gr.Chatbot(height=600, label="Chimera AI")
|
156 |
+
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False)
|
157 |
+
|
158 |
+
with gr.Blocks(fill_height=True) as demo:
|
159 |
+
gr.Markdown(DESCRIPTION)
|
160 |
+
gr.ChatInterface(
|
161 |
+
fn=bot_comms,
|
162 |
+
chatbot=chatbot,
|
163 |
+
fill_height=True,
|
164 |
+
multimodal=True,
|
165 |
+
textbox=chat_input,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
)
|
167 |
|
168 |
if __name__ == "__main__":
|
169 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|