paralym commited on
Commit
83b30a3
·
verified ·
1 Parent(s): b5a3831

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +339 -149
app.py CHANGED
@@ -1,20 +1,22 @@
 
1
  import gradio as gr
2
  import os
3
  from threading import Thread
4
- from queue import Queue
5
- import time
6
  import cv2
 
7
  import datetime
 
8
  import torch
 
9
  import spaces
10
  import numpy as np
11
- import json
12
- import hashlib
13
- import PIL
14
- from typing import Iterator
15
 
16
  from llava import conversation as conversation_lib
17
  from llava.constants import DEFAULT_IMAGE_TOKEN
 
 
18
  from llava.constants import (
19
  IMAGE_TOKEN_INDEX,
20
  DEFAULT_IMAGE_TOKEN,
@@ -29,14 +31,24 @@ from llava.mm_utils import (
29
  get_model_name_from_path,
30
  KeywordsStoppingCriteria,
31
  )
32
- import sys
33
  from serve_constants import html_header
34
 
35
  import requests
36
  from PIL import Image
37
  from io import BytesIO
38
- from transformers import TextIteratorStreamer
 
 
 
 
 
 
 
 
 
39
  import subprocess
 
40
 
41
  external_log_dir = "./logs"
42
  LOGDIR = external_log_dir
@@ -51,9 +63,13 @@ def install_gradio_4_35_0():
51
  else:
52
  print("Gradio 4.35.0 is already installed.")
53
 
 
54
  install_gradio_4_35_0()
55
 
 
 
56
  print(f"Gradio version: {gr.__version__}")
 
57
 
58
  def get_conv_log_filename():
59
  t = datetime.datetime.now()
@@ -66,12 +82,12 @@ class InferenceDemo(object):
66
  ) -> None:
67
  disable_torch_init()
68
 
69
- self.tokenizer = tokenizer
70
- self.model = model
71
- self.image_processor = image_processor
72
- self.context_len = context_len
73
-
74
- model_name = get_model_name_from_path(model_path)
75
 
76
  if "llama-2" in model_name.lower():
77
  conv_mode = "llava_llama_2"
@@ -94,43 +110,31 @@ class InferenceDemo(object):
94
  )
95
  else:
96
  args.conv_mode = conv_mode
97
-
98
  self.conv_mode = conv_mode
99
  self.conversation = conv_templates[args.conv_mode].copy()
100
  self.num_frames = args.num_frames
101
 
102
- def process_stream(streamer: TextIteratorStreamer, history: list, q: Queue):
103
- """Process the output stream and put partial text into a queue"""
104
- try:
105
- current_message = ""
106
- for new_text in streamer:
107
- current_message += new_text
108
- history[-1][1] = current_message
109
- q.put(history.copy())
110
- time.sleep(0.02) # Add a small delay to prevent overloading
111
- except Exception as e:
112
- print(f"Error in process_stream: {e}")
113
- finally:
114
- q.put(None) # Signal that we're done
115
-
116
- def stream_output(history: list, q: Queue) -> Iterator[list]:
117
- """Yield updated history as it comes through the queue"""
118
- while True:
119
- val = q.get()
120
- if val is None:
121
- break
122
- yield val
123
- q.task_done()
124
 
125
  def is_valid_video_filename(name):
126
  video_extensions = ["avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg"]
 
127
  ext = name.split(".")[-1].lower()
128
- return ext in video_extensions
 
 
 
 
129
 
130
  def is_valid_image_filename(name):
131
- image_extensions = ["jpg", "jpeg", "png", "bmp", "gif", "tiff", "webp", "heic", "heif", "jfif", "svg", "eps", "raw"]
 
132
  ext = name.split(".")[-1].lower()
133
- return ext in image_extensions
 
 
 
 
 
134
 
135
  def sample_frames(video_file, num_frames):
136
  video = cv2.VideoCapture(video_file)
@@ -139,33 +143,54 @@ def sample_frames(video_file, num_frames):
139
  frames = []
140
  for i in range(total_frames):
141
  ret, frame = video.read()
 
142
  if not ret:
143
  continue
144
- pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
145
  if i % interval == 0:
146
  frames.append(pil_img)
147
  video.release()
148
  return frames
149
 
 
150
  def load_image(image_file):
151
- if image_file.startswith(("http://", "https://")):
152
  response = requests.get(image_file)
153
  if response.status_code == 200:
154
  image = Image.open(BytesIO(response.content)).convert("RGB")
155
  else:
156
- print("Failed to load the image")
157
- return None
158
  else:
159
- print("Load image from local file:", image_file)
 
160
  image = Image.open(image_file).convert("RGB")
 
161
  return image
162
 
 
163
  def clear_history(history):
164
- global our_chatbot
165
  our_chatbot.conversation = conv_templates[our_chatbot.conv_mode].copy()
 
166
  return None
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  def add_message(history, message):
 
169
  global our_chatbot
170
  if len(history) == 0:
171
  our_chatbot = InferenceDemo(
@@ -178,47 +203,38 @@ def add_message(history, message):
178
  history.append((message["text"], None))
179
  return history, gr.MultimodalTextbox(value=None, interactive=False)
180
 
 
181
  @spaces.GPU
182
  def bot(history):
183
- global start_tstamp, finish_tstamp
184
-
185
- start_tstamp = time.time()
186
  text = history[-1][0]
187
  images_this_term = []
 
 
188
  num_new_images = 0
189
-
190
  for i, message in enumerate(history[:-1]):
191
- if isinstance(message[0], tuple):
192
  images_this_term.append(message[0][0])
193
  if is_valid_video_filename(message[0][0]):
 
194
  raise ValueError("Video is not supported")
 
195
  elif is_valid_image_filename(message[0][0]):
 
196
  num_new_images += 1
197
  else:
198
  raise ValueError("Invalid image file")
199
  else:
200
  num_new_images = 0
201
 
202
- assert len(images_this_term) > 0, "Must have an image"
203
-
204
- image_list = []
205
- for f in images_this_term:
206
- if is_valid_video_filename(f):
207
- image_list += sample_frames(f, our_chatbot.num_frames)
208
- elif is_valid_image_filename(f):
209
- image_list.append(load_image(f))
210
- else:
211
- raise ValueError("Invalid image file")
212
-
213
- image_tensor = [
214
- our_chatbot.image_processor.preprocess(f, return_tensors="pt")["pixel_values"][0]
215
- .half()
216
- .to(our_chatbot.model.device)
217
- for f in image_list
218
- ]
219
-
220
- # Process image hashes
221
  all_image_hash = []
 
222
  for image_path in images_this_term:
223
  with open(image_path, "rb") as image_file:
224
  image_data = image_file.read()
@@ -232,26 +248,54 @@ def bot(history):
232
  f"{t.year}-{t.month:02d}-{t.day:02d}",
233
  f"{image_hash}.jpg",
234
  )
 
235
  if not os.path.isfile(filename):
236
  os.makedirs(os.path.dirname(filename), exist_ok=True)
 
237
  image.save(filename)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
  image_tensor = torch.stack(image_tensor)
240
  image_token = DEFAULT_IMAGE_TOKEN * num_new_images
241
- inp = image_token + "\n" + text
242
-
 
 
 
243
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp)
 
244
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None)
245
  prompt = our_chatbot.conversation.get_prompt()
246
 
247
- input_ids = (
248
- tokenizer_image_token(
 
 
 
 
 
 
249
  prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
250
- )
251
- .unsqueeze(0)
252
- .to(our_chatbot.model.device)
253
- )
254
-
255
  stop_str = (
256
  our_chatbot.conversation.sep
257
  if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
@@ -261,54 +305,85 @@ def bot(history):
261
  stopping_criteria = KeywordsStoppingCriteria(
262
  keywords, our_chatbot.tokenizer, input_ids
263
  )
264
-
265
- # Set up streaming
266
- q = Queue()
267
  streamer = TextIteratorStreamer(
268
- our_chatbot.tokenizer,
269
- skip_prompt=True,
270
- skip_special_tokens=True
271
  )
272
-
273
- # Start generation in a separate thread
274
- thread = Thread(
275
- target=process_stream,
276
- args=(streamer, history, q)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  )
278
- thread.start()
279
-
280
- # Start the generation
281
- with torch.inference_mode():
282
- output_ids = our_chatbot.model.generate(
283
- input_ids,
284
- images=image_tensor,
285
- do_sample=True,
286
- temperature=0.2,
287
- max_new_tokens=1024,
288
- streamer=streamer,
289
- use_cache=True,
290
- stopping_criteria=[stopping_criteria],
291
- )
292
-
293
- finish_tstamp = time.time()
294
 
295
- # Log conversation
 
 
 
 
 
 
 
 
 
296
  with open(get_conv_log_filename(), "a") as fout:
297
  data = {
298
- "tstamp": round(finish_tstamp, 4),
299
  "type": "chat",
300
  "model": "Pangea-7b",
301
- "start": round(start_tstamp, 4),
302
- "finish": round(finish_tstamp, 4),
303
  "state": history,
304
  "images": all_image_hash,
 
305
  }
 
306
  fout.write(json.dumps(data) + "\n")
 
 
307
 
308
- # Return a generator that will yield updated history
309
- return stream_output(history, q)
 
 
 
 
 
 
 
 
310
 
311
- with gr.Blocks(css=".message-wrap.svelte-1lcyrx4>div.svelte-1lcyrx4 img {min-width: 40px}") as demo:
 
312
  gr.HTML(html_header)
313
 
314
  with gr.Column():
@@ -319,8 +394,10 @@ with gr.Blocks(css=".message-wrap.svelte-1lcyrx4>div.svelte-1lcyrx4 img {min-wid
319
  upvote_btn = gr.Button(value="👍 Upvote", interactive=True)
320
  downvote_btn = gr.Button(value="👎 Downvote", interactive=True)
321
  flag_btn = gr.Button(value="⚠️ Flag", interactive=True)
 
322
  regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=True)
323
  clear_btn = gr.Button(value="🗑️ Clear history", interactive=True)
 
324
 
325
  chat_input = gr.MultimodalTextbox(
326
  interactive=True,
@@ -330,11 +407,11 @@ with gr.Blocks(css=".message-wrap.svelte-1lcyrx4>div.svelte-1lcyrx4 img {min-wid
330
  submit_btn="🚀"
331
  )
332
 
333
- cur_dir = os.path.dirname(os.path.abspath(__file__))
334
  gr.Examples(
335
- examples_per_page=20,
336
- examples=[
337
- [
338
  {
339
  "files": [
340
  f"{cur_dir}/examples/user_example_07.jpg",
@@ -358,45 +435,158 @@ with gr.Blocks(css=".message-wrap.svelte-1lcyrx4>div.svelte-1lcyrx4 img {min-wid
358
  "text": "Why this image funny?",
359
  },
360
  ],
361
- ],
362
- inputs=[chat_input],
363
- label="Image",
364
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
 
366
  chat_msg = chat_input.submit(
367
- add_message,
368
- [chatbot, chat_input],
369
- [chatbot, chat_input],
370
- queue=False
371
- ).then(
372
- bot,
373
- chatbot,
374
- chatbot,
375
- api_name="bot_response"
376
- ).then(
377
- lambda: gr.MultimodalTextbox(interactive=True),
378
- None,
379
- [chat_input]
380
  )
 
 
381
 
 
382
  clear_btn.click(
383
- fn=clear_history,
384
- inputs=[chatbot],
385
- outputs=[chatbot],
386
- api_name="clear_all",
387
- queue=False
388
  )
389
 
390
- regenerate_btn.click(
391
- fn=lambda history: history[:-1],
392
- inputs=[chatbot],
393
- outputs=[chatbot],
394
- queue=False
395
- ).then(
396
- bot,
397
- chatbot,
398
- chatbot
399
- )
400
 
401
  demo.queue()
402
 
 
1
+ # from .demo_modelpart import InferenceDemo
2
  import gradio as gr
3
  import os
4
  from threading import Thread
5
+
6
+ # import time
7
  import cv2
8
+
9
  import datetime
10
+ # import copy
11
  import torch
12
+
13
  import spaces
14
  import numpy as np
 
 
 
 
15
 
16
  from llava import conversation as conversation_lib
17
  from llava.constants import DEFAULT_IMAGE_TOKEN
18
+
19
+
20
  from llava.constants import (
21
  IMAGE_TOKEN_INDEX,
22
  DEFAULT_IMAGE_TOKEN,
 
31
  get_model_name_from_path,
32
  KeywordsStoppingCriteria,
33
  )
34
+
35
  from serve_constants import html_header
36
 
37
  import requests
38
  from PIL import Image
39
  from io import BytesIO
40
+ from transformers import TextStreamer, TextIteratorStreamer
41
+
42
+ import hashlib
43
+ import PIL
44
+ import base64
45
+ import json
46
+
47
+ import datetime
48
+ import gradio as gr
49
+ import gradio_client
50
  import subprocess
51
+ import sys
52
 
53
  external_log_dir = "./logs"
54
  LOGDIR = external_log_dir
 
63
  else:
64
  print("Gradio 4.35.0 is already installed.")
65
 
66
+ # Call the function to install Gradio 4.35.0 if needed
67
  install_gradio_4_35_0()
68
 
69
+ import gradio as gr
70
+ import gradio_client
71
  print(f"Gradio version: {gr.__version__}")
72
+ print(f"Gradio-client version: {gradio_client.__version__}")
73
 
74
  def get_conv_log_filename():
75
  t = datetime.datetime.now()
 
82
  ) -> None:
83
  disable_torch_init()
84
 
85
+ self.tokenizer, self.model, self.image_processor, self.context_len = (
86
+ tokenizer,
87
+ model,
88
+ image_processor,
89
+ context_len,
90
+ )
91
 
92
  if "llama-2" in model_name.lower():
93
  conv_mode = "llava_llama_2"
 
110
  )
111
  else:
112
  args.conv_mode = conv_mode
 
113
  self.conv_mode = conv_mode
114
  self.conversation = conv_templates[args.conv_mode].copy()
115
  self.num_frames = args.num_frames
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  def is_valid_video_filename(name):
119
  video_extensions = ["avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg"]
120
+
121
  ext = name.split(".")[-1].lower()
122
+
123
+ if ext in video_extensions:
124
+ return True
125
+ else:
126
+ return False
127
 
128
  def is_valid_image_filename(name):
129
+ image_extensions = ["jpg", "jpeg", "png", "bmp", "gif", "tiff", "webp", "heic", "heif", "jfif", "svg", "eps", "raw"]
130
+
131
  ext = name.split(".")[-1].lower()
132
+
133
+ if ext in image_extensions:
134
+ return True
135
+ else:
136
+ return False
137
+
138
 
139
  def sample_frames(video_file, num_frames):
140
  video = cv2.VideoCapture(video_file)
 
143
  frames = []
144
  for i in range(total_frames):
145
  ret, frame = video.read()
146
+ pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
147
  if not ret:
148
  continue
 
149
  if i % interval == 0:
150
  frames.append(pil_img)
151
  video.release()
152
  return frames
153
 
154
+
155
  def load_image(image_file):
156
+ if image_file.startswith("http") or image_file.startswith("https"):
157
  response = requests.get(image_file)
158
  if response.status_code == 200:
159
  image = Image.open(BytesIO(response.content)).convert("RGB")
160
  else:
161
+ print("failed to load the image")
 
162
  else:
163
+ print("Load image from local file")
164
+ print(image_file)
165
  image = Image.open(image_file).convert("RGB")
166
+
167
  return image
168
 
169
+
170
  def clear_history(history):
171
+
172
  our_chatbot.conversation = conv_templates[our_chatbot.conv_mode].copy()
173
+
174
  return None
175
 
176
+
177
+ def clear_response(history):
178
+ for index_conv in range(1, len(history)):
179
+ # loop until get a text response from our model.
180
+ conv = history[-index_conv]
181
+ if not (conv[0] is None):
182
+ break
183
+ question = history[-index_conv][0]
184
+ history = history[:-index_conv]
185
+ return history, question
186
+
187
+
188
+ # def print_like_dislike(x: gr.LikeData):
189
+ # print(x.index, x.value, x.liked)
190
+
191
+
192
  def add_message(history, message):
193
+ # history=[]
194
  global our_chatbot
195
  if len(history) == 0:
196
  our_chatbot = InferenceDemo(
 
203
  history.append((message["text"], None))
204
  return history, gr.MultimodalTextbox(value=None, interactive=False)
205
 
206
+
207
  @spaces.GPU
208
  def bot(history):
 
 
 
209
  text = history[-1][0]
210
  images_this_term = []
211
+ text_this_term = ""
212
+ # import pdb;pdb.set_trace()
213
  num_new_images = 0
 
214
  for i, message in enumerate(history[:-1]):
215
+ if type(message[0]) is tuple:
216
  images_this_term.append(message[0][0])
217
  if is_valid_video_filename(message[0][0]):
218
+ # 不接受视频
219
  raise ValueError("Video is not supported")
220
+ num_new_images += our_chatbot.num_frames
221
  elif is_valid_image_filename(message[0][0]):
222
+ print("#### Load image from local file",message[0][0])
223
  num_new_images += 1
224
  else:
225
  raise ValueError("Invalid image file")
226
  else:
227
  num_new_images = 0
228
 
229
+ # for message in history[-i-1:]:
230
+ # images_this_term.append(message[0][0])
231
+
232
+ assert len(images_this_term) > 0, "must have an image"
233
+ # image_files = (args.image_file).split(',')
234
+ # image = [load_image(f) for f in images_this_term if f]
235
+
 
 
 
 
 
 
 
 
 
 
 
 
236
  all_image_hash = []
237
+ all_image_path = []
238
  for image_path in images_this_term:
239
  with open(image_path, "rb") as image_file:
240
  image_data = image_file.read()
 
248
  f"{t.year}-{t.month:02d}-{t.day:02d}",
249
  f"{image_hash}.jpg",
250
  )
251
+ all_image_path.append(filename)
252
  if not os.path.isfile(filename):
253
  os.makedirs(os.path.dirname(filename), exist_ok=True)
254
+ print("image save to",filename)
255
  image.save(filename)
256
+
257
+ image_list = []
258
+ for f in images_this_term:
259
+ if is_valid_video_filename(f):
260
+ image_list += sample_frames(f, our_chatbot.num_frames)
261
+ elif is_valid_image_filename(f):
262
+ image_list.append(load_image(f))
263
+ else:
264
+ raise ValueError("Invalid image file")
265
+
266
+ image_tensor = [
267
+ our_chatbot.image_processor.preprocess(f, return_tensors="pt")["pixel_values"][
268
+ 0
269
+ ]
270
+ .half()
271
+ .to(our_chatbot.model.device)
272
+ for f in image_list
273
+ ]
274
+
275
 
276
  image_tensor = torch.stack(image_tensor)
277
  image_token = DEFAULT_IMAGE_TOKEN * num_new_images
278
+ # if our_chatbot.model.config.mm_use_im_start_end:
279
+ # inp = DEFAULT_IM_START_TOKEN + image_token + DEFAULT_IM_END_TOKEN + "\n" + inp
280
+ # else:
281
+ inp = text
282
+ inp = image_token + "\n" + inp
283
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp)
284
+ # image = None
285
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None)
286
  prompt = our_chatbot.conversation.get_prompt()
287
 
288
+ # input_ids = (
289
+ # tokenizer_image_token(
290
+ # prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
291
+ # )
292
+ # .unsqueeze(0)
293
+ # .to(our_chatbot.model.device)
294
+ # )
295
+ input_ids = tokenizer_image_token(
296
  prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
297
+ ).unsqueeze(0).to(our_chatbot.model.device)
298
+ # print("### input_id",input_ids)
 
 
 
299
  stop_str = (
300
  our_chatbot.conversation.sep
301
  if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
 
305
  stopping_criteria = KeywordsStoppingCriteria(
306
  keywords, our_chatbot.tokenizer, input_ids
307
  )
308
+ # streamer = TextStreamer(
309
+ # our_chatbot.tokenizer, skip_prompt=True, skip_special_tokens=True
310
+ # )
311
  streamer = TextIteratorStreamer(
312
+ our_chatbot.tokenizer, skip_prompt=True, skip_special_tokens=True
 
 
313
  )
314
+ print(our_chatbot.model.device)
315
+ print(input_ids.device)
316
+ print(image_tensor.device)
317
+
318
+ # with torch.inference_mode():
319
+ # output_ids = our_chatbot.model.generate(
320
+ # input_ids,
321
+ # images=image_tensor,
322
+ # do_sample=True,
323
+ # temperature=0.7,
324
+ # top_p=1.0,
325
+ # max_new_tokens=4096,
326
+ # streamer=streamer,
327
+ # use_cache=False,
328
+ # stopping_criteria=[stopping_criteria],
329
+ # )
330
+
331
+ # outputs = our_chatbot.tokenizer.decode(output_ids[0]).strip()
332
+ # if outputs.endswith(stop_str):
333
+ # outputs = outputs[: -len(stop_str)]
334
+ # our_chatbot.conversation.messages[-1][-1] = outputs
335
+
336
+ # history[-1] = [text, outputs]
337
+
338
+ # return history
339
+ generate_kwargs = dict(
340
+ inputs=input_ids,
341
+ streamer=streamer,
342
+ images=image_tensor,
343
+ max_new_tokens=1024,
344
+ do_sample=True,
345
+ temperature=0.2,
346
+ num_beams=1,
347
+ use_cache=False,
348
+ stopping_criteria=[stopping_criteria],
349
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
 
351
+ t = Thread(target=our_chatbot.model.generate, kwargs=generate_kwargs)
352
+ t.start()
353
+
354
+ outputs = []
355
+ for text in streamer:
356
+ outputs.append(text)
357
+ our_chatbot.conversation.messages[-1][-1] = "".join(outputs)
358
+ history[-1] = [text, "".join(outputs)]
359
+ yield history
360
+
361
  with open(get_conv_log_filename(), "a") as fout:
362
  data = {
 
363
  "type": "chat",
364
  "model": "Pangea-7b",
 
 
365
  "state": history,
366
  "images": all_image_hash,
367
+ "images_path": all_image_path
368
  }
369
+ print("#### conv log",data)
370
  fout.write(json.dumps(data) + "\n")
371
+
372
+
373
 
374
+ txt = gr.Textbox(
375
+ scale=4,
376
+ show_label=False,
377
+ placeholder="Enter text and press enter.",
378
+ container=False,
379
+ )
380
+
381
+ with gr.Blocks(
382
+ css=".message-wrap.svelte-1lcyrx4>div.svelte-1lcyrx4 img {min-width: 40px}",
383
+ ) as demo:
384
 
385
+ cur_dir = os.path.dirname(os.path.abspath(__file__))
386
+ # gr.Markdown(title_markdown)
387
  gr.HTML(html_header)
388
 
389
  with gr.Column():
 
394
  upvote_btn = gr.Button(value="👍 Upvote", interactive=True)
395
  downvote_btn = gr.Button(value="👎 Downvote", interactive=True)
396
  flag_btn = gr.Button(value="⚠️ Flag", interactive=True)
397
+ # stop_btn = gr.Button(value="⏹️ Stop Generation", interactive=True)
398
  regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=True)
399
  clear_btn = gr.Button(value="🗑️ Clear history", interactive=True)
400
+
401
 
402
  chat_input = gr.MultimodalTextbox(
403
  interactive=True,
 
407
  submit_btn="🚀"
408
  )
409
 
410
+ print(cur_dir)
411
  gr.Examples(
412
+ examples_per_page=20,
413
+ examples=[
414
+ [
415
  {
416
  "files": [
417
  f"{cur_dir}/examples/user_example_07.jpg",
 
435
  "text": "Why this image funny?",
436
  },
437
  ],
438
+ [
439
+ {
440
+ "files": [
441
+ f"{cur_dir}/examples/norway.jpg",
442
+ ],
443
+ "text": "Analysieren, in welchem Land diese Szene höchstwahrscheinlich gedreht wurde.",
444
+ },
445
+ ],
446
+ [
447
+ {
448
+ "files": [
449
+ f"{cur_dir}/examples/totoro.jpg",
450
+ ],
451
+ "text": "¿En qué anime aparece esta escena? ¿Puedes presentarlo?",
452
+ },
453
+ ],
454
+ [
455
+ {
456
+ "files": [
457
+ f"{cur_dir}/examples/africa.jpg",
458
+ ],
459
+ "text": "इस तस्वीर में हर एक दृश्य तत्व का क्या प्रतिनिधित्व करता है?",
460
+ },
461
+ ],
462
+ [
463
+ {
464
+ "files": [
465
+ f"{cur_dir}/examples/hot_ballon.jpg",
466
+ ],
467
+ "text": "ฉากบอลลูนลมร้อนในภาพนี้อาจอยู่ที่ไหน? สถานที่นี้มีความพิเศษอย่างไร?",
468
+ },
469
+ ],
470
+ [
471
+ {
472
+ "files": [
473
+ f"{cur_dir}/examples/bar.jpg",
474
+ ],
475
+ "text": "Você pode me dar ideias de design baseadas no tema de coquetéis deste letreiro?",
476
+ },
477
+ ],
478
+ [
479
+ {
480
+ "files": [
481
+ f"{cur_dir}/examples/pink_lake.jpg",
482
+ ],
483
+ "text": "Обясни защо езерото на този остров е в този цвят.",
484
+ },
485
+ ],
486
+ [
487
+ {
488
+ "files": [
489
+ f"{cur_dir}/examples/hanzi.jpg",
490
+ ],
491
+ "text": "Can you describe in Hebrew the evolution process of these four Chinese characters from pictographs to modern characters?",
492
+ },
493
+ ],
494
+ [
495
+ {
496
+ "files": [
497
+ f"{cur_dir}/examples/ballon.jpg",
498
+ ],
499
+ "text": "இந்த காட்சியை விவரிக்கவும், மேலும் இந்த படத்தின் அடிப்படையில் துருக்கியில் இந்த காட்சியுடன் தொடர்பான சில பிரபலமான நிகழ்வுகள் என்ன?",
500
+ },
501
+ ],
502
+ [
503
+ {
504
+ "files": [
505
+ f"{cur_dir}/examples/pie.jpg",
506
+ ],
507
+ "text": "Décrivez ce graphique. Quelles informations pouvons-nous en tirer?",
508
+ },
509
+ ],
510
+ [
511
+ {
512
+ "files": [
513
+ f"{cur_dir}/examples/camera.jpg",
514
+ ],
515
+ "text": "Apa arti dari dua angka di sebelah kiri yang ditampilkan di layar kamera?",
516
+ },
517
+ ],
518
+ [
519
+ {
520
+ "files": [
521
+ f"{cur_dir}/examples/dog.jpg",
522
+ ],
523
+ "text": "이 강아지의 표정을 보고 어떤 기분이나 감정을 느끼고 있는지 설명해 주시겠어요?",
524
+ },
525
+ ],
526
+ [
527
+ {
528
+ "files": [
529
+ f"{cur_dir}/examples/book.jpg",
530
+ ],
531
+ "text": "What language is the text in, and what does the title mean in English?",
532
+ },
533
+ ],
534
+ [
535
+ {
536
+ "files": [
537
+ f"{cur_dir}/examples/food.jpg",
538
+ ],
539
+ "text": "Unaweza kunipa kichocheo cha kutengeneza hii pancake?",
540
+ },
541
+ ],
542
+ [
543
+ {
544
+ "files": [
545
+ f"{cur_dir}/examples/line chart.jpg",
546
+ ],
547
+ "text": "Hãy trình bày những xu hướng mà bạn quan sát được từ biểu đồ và hiện tượng xã hội tiềm ẩn từ đó.",
548
+ },
549
+ ],
550
+ [
551
+ {
552
+ "files": [
553
+ f"{cur_dir}/examples/south africa.jpg",
554
+ ],
555
+ "text": "Waar is hierdie plek? Help my om ’n reisroete vir hierdie land te beplan.",
556
+ },
557
+ ],
558
+ [
559
+ {
560
+ "files": [
561
+ f"{cur_dir}/examples/girl.jpg",
562
+ ],
563
+ "text": "لماذا هذه الصورة مضحكة؟",
564
+ },
565
+ ],
566
+ [
567
+ {
568
+ "files": [
569
+ f"{cur_dir}/examples/eagles.jpg",
570
+ ],
571
+ "text": "Какой креатив должен быть в этом логотипе?",
572
+ },
573
+ ],
574
+ ],
575
+ inputs=[chat_input],
576
+ label="Image",
577
+ )
578
 
579
  chat_msg = chat_input.submit(
580
+ add_message, [chatbot, chat_input], [chatbot, chat_input]
 
 
 
 
 
 
 
 
 
 
 
 
581
  )
582
+ bot_msg = chat_msg.then(bot, chatbot, chatbot, api_name="bot_response")
583
+ bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])
584
 
585
+ # chatbot.like(print_like_dislike, None, None)
586
  clear_btn.click(
587
+ fn=clear_history, inputs=[chatbot], outputs=[chatbot], api_name="clear_all"
 
 
 
 
588
  )
589
 
 
 
 
 
 
 
 
 
 
 
590
 
591
  demo.queue()
592