removed xtuner and kept diffusor and gpt3.5
Browse files
app.py
CHANGED
@@ -34,18 +34,18 @@ DESCRIPTION = '''
|
|
34 |
# '''
|
35 |
|
36 |
# Initialize the models
|
37 |
-
llava_model = LlavaForConditionalGeneration.from_pretrained(
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
)
|
42 |
-
llava_model = llava_model.half()
|
43 |
-
llava_model.to("cuda:0")
|
44 |
|
45 |
-
processor = AutoProcessor.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
|
46 |
-
tokenizer = AutoTokenizer.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
|
47 |
|
48 |
-
llava_model.generation_config.eos_token_id = 128009
|
49 |
|
50 |
# # Initialize Stable Diffusion pipelines
|
51 |
# base = DiffusionPipeline.from_pretrained(
|
@@ -92,42 +92,42 @@ refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-ref
|
|
92 |
|
93 |
chat_mode = {}
|
94 |
|
95 |
-
def xtuner(message, history):
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
|
132 |
# prompt = f"user\n\n<image>\n{encoded_image}\n\n{message['text']}assistant\n\n"
|
133 |
# inputs = tokenizer(prompt, image, return_tensors='pt', padding=True, truncation=True, max_length=512)
|
@@ -152,7 +152,7 @@ def xtuner(message, history):
|
|
152 |
# yield generate_text
|
153 |
|
154 |
# yield decoded_output
|
155 |
-
yield decoded_output
|
156 |
|
157 |
def generation(message, history):
|
158 |
"""
|
@@ -241,29 +241,6 @@ def bot_comms(message, history):
|
|
241 |
if message["text"] == "check cuda":
|
242 |
logger.debug("Checking CUDA availability.")
|
243 |
return check_cuda_availability()
|
244 |
-
|
245 |
-
# if message["text"] == "imagery":
|
246 |
-
# logger.debug("Switching to imagery mode.")
|
247 |
-
# # mode_manager.set_mode("imagery")
|
248 |
-
# mode += "imagery"
|
249 |
-
# # logger.debug(f"\nimagery mode: {mode}\n")
|
250 |
-
# return "Imagery On! Type your prompt to make the image πΌοΈ"
|
251 |
-
|
252 |
-
# if message["text"] == "chatting":
|
253 |
-
# logger.debug("Switching to chatting mode.")
|
254 |
-
# # mode_manager.set_mode("chatting")
|
255 |
-
# mode += "chatting"
|
256 |
-
# # logger.debug(f"\nchatting mode: {mode}\n")
|
257 |
-
# return "Imagery Off. Ask me any questions. βοΈ"
|
258 |
-
|
259 |
-
# if mode == "imagery":
|
260 |
-
# logger.debug("Processing imagery prompt.")
|
261 |
-
# if isinstance(message, dict) and "text" in message:
|
262 |
-
# message = message["text"]
|
263 |
-
# image = diffusing(message)
|
264 |
-
# # mode_gradio("imagery")
|
265 |
-
# image_created["Image"] = image
|
266 |
-
# return image
|
267 |
|
268 |
buffer = ""
|
269 |
gpt_outputs = []
|
@@ -306,22 +283,9 @@ with gr.Blocks(fill_height=True) as demo:
|
|
306 |
gr.Markdown('''
|
307 |
<div>
|
308 |
<h1 style="text-align: center;">Chimera Text Generation</h1>
|
309 |
-
<p style="text-align: center;">This contains a Generative LLM from <a href="https://openai.com/"><b>Open AI</b></a> called GPT-3.5-Turbo
|
310 |
</div>
|
311 |
''')
|
312 |
chat = gr.ChatInterface(fn=bot_comms)
|
313 |
-
# Xtuner
|
314 |
-
with gr.Row():
|
315 |
-
with gr.Column():
|
316 |
-
gr.Markdown('''
|
317 |
-
<div>
|
318 |
-
<p style="text-align: center;">This has a LLava-Model from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers"><b>Xtuner</b></a></p>
|
319 |
-
</div>
|
320 |
-
''')
|
321 |
-
chat = gr.ChatInterface(
|
322 |
-
fn=xtuner,
|
323 |
-
multimodal=True,
|
324 |
-
textbox=chat_input,
|
325 |
-
)
|
326 |
|
327 |
demo.launch()
|
|
|
34 |
# '''
|
35 |
|
36 |
# Initialize the models
|
37 |
+
# llava_model = LlavaForConditionalGeneration.from_pretrained(
|
38 |
+
# "xtuner/llava-llama-3-8b-v1_1-transformers",
|
39 |
+
# torch_dtype=torch.float32,
|
40 |
+
# low_cpu_mem_usage=True,
|
41 |
+
# )
|
42 |
+
# llava_model = llava_model.half()
|
43 |
+
# llava_model.to("cuda:0")
|
44 |
|
45 |
+
# processor = AutoProcessor.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
|
46 |
+
# tokenizer = AutoTokenizer.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
|
47 |
|
48 |
+
# llava_model.generation_config.eos_token_id = 128009
|
49 |
|
50 |
# # Initialize Stable Diffusion pipelines
|
51 |
# base = DiffusionPipeline.from_pretrained(
|
|
|
92 |
|
93 |
chat_mode = {}
|
94 |
|
95 |
+
# def xtuner(message, history):
|
96 |
+
# """
|
97 |
+
# Generates text out of image input from llava.
|
98 |
+
# """
|
99 |
+
# image_path = None
|
100 |
+
# if "files" in message and message["files"]:
|
101 |
+
# if type(message["files"][-1]) == dict:
|
102 |
+
# image_path = message["files"][-1]["path"]
|
103 |
+
# else:
|
104 |
+
# image_path = message["files"][-1]
|
105 |
+
# else:
|
106 |
+
# for hist in history:
|
107 |
+
# if type(hist[0]) == tuple:
|
108 |
+
# image_path = hist[0][0]
|
109 |
+
|
110 |
+
# try:
|
111 |
+
# if image_path is None:
|
112 |
+
# gr.Error("Please upload an image so Chimera can understand it.")
|
113 |
+
# except NameError:
|
114 |
+
# gr.Error("Upload an Image so Chimera can understand it.")
|
115 |
+
|
116 |
+
# # prompt = f"user\n\n<image>\n{message['text']}assistant\n\n"
|
117 |
+
# # image = Image.open(image_path)
|
118 |
+
# with open(image_path, "rb") as image_file:
|
119 |
+
# encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
|
120 |
+
|
121 |
+
# prompt = f"user\n\n<image>\n{encoded_image}\n\n{message['text']}assistant\n\n"
|
122 |
+
# inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512)
|
123 |
+
# streamer = TextIteratorStreamer(processor.tokenizer, **{"skip_special_tokens": False, "skip_prompt": True})
|
124 |
+
# generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False)
|
125 |
+
|
126 |
+
# with torch.cuda.amp.autocast():
|
127 |
+
# outputs = llava_model.generate(**generation_kwargs)
|
128 |
+
|
129 |
+
# output_ids = outputs[0].cpu().numpy().tolist()
|
130 |
+
# decoded_output = tokenizer.decode(output_ids, skip_special_tokens=True)
|
131 |
|
132 |
# prompt = f"user\n\n<image>\n{encoded_image}\n\n{message['text']}assistant\n\n"
|
133 |
# inputs = tokenizer(prompt, image, return_tensors='pt', padding=True, truncation=True, max_length=512)
|
|
|
152 |
# yield generate_text
|
153 |
|
154 |
# yield decoded_output
|
155 |
+
# yield decoded_output
|
156 |
|
157 |
def generation(message, history):
|
158 |
"""
|
|
|
241 |
if message["text"] == "check cuda":
|
242 |
logger.debug("Checking CUDA availability.")
|
243 |
return check_cuda_availability()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
|
245 |
buffer = ""
|
246 |
gpt_outputs = []
|
|
|
283 |
gr.Markdown('''
|
284 |
<div>
|
285 |
<h1 style="text-align: center;">Chimera Text Generation</h1>
|
286 |
+
<p style="text-align: center;">This contains a Generative LLM from <a href="https://openai.com/"><b>Open AI</b></a> called GPT-3.5-Turbo and Vision.</p>
|
287 |
</div>
|
288 |
''')
|
289 |
chat = gr.ChatInterface(fn=bot_comms)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
|
291 |
demo.launch()
|