sandz7 commited on
Commit
f6da98c
Β·
1 Parent(s): ed67eef

removed xtuner and kept diffusor and gpt3.5

Browse files
Files changed (1) hide show
  1. app.py +48 -84
app.py CHANGED
@@ -34,18 +34,18 @@ DESCRIPTION = '''
34
  # '''
35
 
36
  # Initialize the models
37
- llava_model = LlavaForConditionalGeneration.from_pretrained(
38
- "xtuner/llava-llama-3-8b-v1_1-transformers",
39
- torch_dtype=torch.float32,
40
- low_cpu_mem_usage=True,
41
- )
42
- llava_model = llava_model.half()
43
- llava_model.to("cuda:0")
44
 
45
- processor = AutoProcessor.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
46
- tokenizer = AutoTokenizer.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
47
 
48
- llava_model.generation_config.eos_token_id = 128009
49
 
50
  # # Initialize Stable Diffusion pipelines
51
  # base = DiffusionPipeline.from_pretrained(
@@ -92,42 +92,42 @@ refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-ref
92
 
93
  chat_mode = {}
94
 
95
- def xtuner(message, history):
96
- """
97
- Generates text out of image input from llava.
98
- """
99
- image_path = None
100
- if "files" in message and message["files"]:
101
- if type(message["files"][-1]) == dict:
102
- image_path = message["files"][-1]["path"]
103
- else:
104
- image_path = message["files"][-1]
105
- else:
106
- for hist in history:
107
- if type(hist[0]) == tuple:
108
- image_path = hist[0][0]
109
-
110
- try:
111
- if image_path is None:
112
- gr.Error("Please upload an image so Chimera can understand it.")
113
- except NameError:
114
- gr.Error("Upload an Image so Chimera can understand it.")
115
-
116
- # prompt = f"user\n\n<image>\n{message['text']}assistant\n\n"
117
- # image = Image.open(image_path)
118
- with open(image_path, "rb") as image_file:
119
- encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
120
-
121
- prompt = f"user\n\n<image>\n{encoded_image}\n\n{message['text']}assistant\n\n"
122
- inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512)
123
- streamer = TextIteratorStreamer(processor.tokenizer, **{"skip_special_tokens": False, "skip_prompt": True})
124
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False)
125
-
126
- with torch.cuda.amp.autocast():
127
- outputs = llava_model.generate(**generation_kwargs)
128
-
129
- output_ids = outputs[0].cpu().numpy().tolist()
130
- decoded_output = tokenizer.decode(output_ids, skip_special_tokens=True)
131
 
132
  # prompt = f"user\n\n<image>\n{encoded_image}\n\n{message['text']}assistant\n\n"
133
  # inputs = tokenizer(prompt, image, return_tensors='pt', padding=True, truncation=True, max_length=512)
@@ -152,7 +152,7 @@ def xtuner(message, history):
152
  # yield generate_text
153
 
154
  # yield decoded_output
155
- yield decoded_output
156
 
157
  def generation(message, history):
158
  """
@@ -241,29 +241,6 @@ def bot_comms(message, history):
241
  if message["text"] == "check cuda":
242
  logger.debug("Checking CUDA availability.")
243
  return check_cuda_availability()
244
-
245
- # if message["text"] == "imagery":
246
- # logger.debug("Switching to imagery mode.")
247
- # # mode_manager.set_mode("imagery")
248
- # mode += "imagery"
249
- # # logger.debug(f"\nimagery mode: {mode}\n")
250
- # return "Imagery On! Type your prompt to make the image πŸ–ΌοΈ"
251
-
252
- # if message["text"] == "chatting":
253
- # logger.debug("Switching to chatting mode.")
254
- # # mode_manager.set_mode("chatting")
255
- # mode += "chatting"
256
- # # logger.debug(f"\nchatting mode: {mode}\n")
257
- # return "Imagery Off. Ask me any questions. β˜„οΈ"
258
-
259
- # if mode == "imagery":
260
- # logger.debug("Processing imagery prompt.")
261
- # if isinstance(message, dict) and "text" in message:
262
- # message = message["text"]
263
- # image = diffusing(message)
264
- # # mode_gradio("imagery")
265
- # image_created["Image"] = image
266
- # return image
267
 
268
  buffer = ""
269
  gpt_outputs = []
@@ -306,22 +283,9 @@ with gr.Blocks(fill_height=True) as demo:
306
  gr.Markdown('''
307
  <div>
308
  <h1 style="text-align: center;">Chimera Text Generation</h1>
309
- <p style="text-align: center;">This contains a Generative LLM from <a href="https://openai.com/"><b>Open AI</b></a> called GPT-3.5-Turbo</p>
310
  </div>
311
  ''')
312
  chat = gr.ChatInterface(fn=bot_comms)
313
- # Xtuner
314
- with gr.Row():
315
- with gr.Column():
316
- gr.Markdown('''
317
- <div>
318
- <p style="text-align: center;">This has a LLava-Model from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers"><b>Xtuner</b></a></p>
319
- </div>
320
- ''')
321
- chat = gr.ChatInterface(
322
- fn=xtuner,
323
- multimodal=True,
324
- textbox=chat_input,
325
- )
326
 
327
  demo.launch()
 
34
  # '''
35
 
36
  # Initialize the models
37
+ # llava_model = LlavaForConditionalGeneration.from_pretrained(
38
+ # "xtuner/llava-llama-3-8b-v1_1-transformers",
39
+ # torch_dtype=torch.float32,
40
+ # low_cpu_mem_usage=True,
41
+ # )
42
+ # llava_model = llava_model.half()
43
+ # llava_model.to("cuda:0")
44
 
45
+ # processor = AutoProcessor.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
46
+ # tokenizer = AutoTokenizer.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
47
 
48
+ # llava_model.generation_config.eos_token_id = 128009
49
 
50
  # # Initialize Stable Diffusion pipelines
51
  # base = DiffusionPipeline.from_pretrained(
 
92
 
93
  chat_mode = {}
94
 
95
+ # def xtuner(message, history):
96
+ # """
97
+ # Generates text out of image input from llava.
98
+ # """
99
+ # image_path = None
100
+ # if "files" in message and message["files"]:
101
+ # if type(message["files"][-1]) == dict:
102
+ # image_path = message["files"][-1]["path"]
103
+ # else:
104
+ # image_path = message["files"][-1]
105
+ # else:
106
+ # for hist in history:
107
+ # if type(hist[0]) == tuple:
108
+ # image_path = hist[0][0]
109
+
110
+ # try:
111
+ # if image_path is None:
112
+ # gr.Error("Please upload an image so Chimera can understand it.")
113
+ # except NameError:
114
+ # gr.Error("Upload an Image so Chimera can understand it.")
115
+
116
+ # # prompt = f"user\n\n<image>\n{message['text']}assistant\n\n"
117
+ # # image = Image.open(image_path)
118
+ # with open(image_path, "rb") as image_file:
119
+ # encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
120
+
121
+ # prompt = f"user\n\n<image>\n{encoded_image}\n\n{message['text']}assistant\n\n"
122
+ # inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512)
123
+ # streamer = TextIteratorStreamer(processor.tokenizer, **{"skip_special_tokens": False, "skip_prompt": True})
124
+ # generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False)
125
+
126
+ # with torch.cuda.amp.autocast():
127
+ # outputs = llava_model.generate(**generation_kwargs)
128
+
129
+ # output_ids = outputs[0].cpu().numpy().tolist()
130
+ # decoded_output = tokenizer.decode(output_ids, skip_special_tokens=True)
131
 
132
  # prompt = f"user\n\n<image>\n{encoded_image}\n\n{message['text']}assistant\n\n"
133
  # inputs = tokenizer(prompt, image, return_tensors='pt', padding=True, truncation=True, max_length=512)
 
152
  # yield generate_text
153
 
154
  # yield decoded_output
155
+ # yield decoded_output
156
 
157
  def generation(message, history):
158
  """
 
241
  if message["text"] == "check cuda":
242
  logger.debug("Checking CUDA availability.")
243
  return check_cuda_availability()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
  buffer = ""
246
  gpt_outputs = []
 
283
  gr.Markdown('''
284
  <div>
285
  <h1 style="text-align: center;">Chimera Text Generation</h1>
286
+ <p style="text-align: center;">This contains a Generative LLM from <a href="https://openai.com/"><b>Open AI</b></a> called GPT-3.5-Turbo and Vision.</p>
287
  </div>
288
  ''')
289
  chat = gr.ChatInterface(fn=bot_comms)
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
  demo.launch()