baohuynhbk14 commited on
Commit
2013bf3
·
1 Parent(s): 9f4e146

Update user agent and enhance predict function with additional parameters for improved flexibility

Browse files
Files changed (1) hide show
  1. app.py +22 -34
app.py CHANGED
@@ -37,7 +37,7 @@ torch.set_default_device('cuda')
37
 
38
  logger = build_logger("gradio_web_server", "gradio_web_server.log")
39
 
40
- headers = {"User-Agent": "Vintern-Chat Client"}
41
 
42
  no_change_btn = gr.Button()
43
  enable_btn = gr.Button(interactive=True)
@@ -181,21 +181,21 @@ model = AutoModel.from_pretrained(
181
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False)
182
 
183
  @spaces.GPU
184
- def predict(message, image_path):
185
- pixel_values = load_image(image_path, max_num=6).to(torch.bfloat16).cuda()
186
- print(f"pixel_values: {pixel_values}")
187
- generation_config = dict(max_new_tokens= 700, do_sample=False, num_beams = 3, repetition_penalty=2.5)
188
- print(f"######################")
189
- print(f"message: {message}")
 
 
 
 
190
  if pixel_values is not None:
191
  question = '<image>\n'+message
192
  else:
193
  question = message
194
- print("Model: ", model)
195
- print("Tokenizer: ", tokenizer)
196
- print("Question: ", question)
197
- response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
198
- print(f"AI response: {response}")
199
  return response, conv_history
200
 
201
  def http_bot(
@@ -220,7 +220,6 @@ def http_bot(
220
  return
221
 
222
  if model is None:
223
- # state.messages[-1][-1] = server_error_msg
224
  state.update_message(Conversation.ASSISTANT, server_error_msg)
225
  yield (
226
  state,
@@ -246,26 +245,15 @@ def http_bot(
246
 
247
  try:
248
  # Stream output
249
- # response = requests.post(worker_addr, json=pload, headers=headers, stream=True, timeout=300)
250
- print(f"all_image_paths: {all_image_paths}")
251
  message = state.get_user_message(source=state.USER)
252
- print(f"message: {message}")
253
- response, conv_history = predict(message, all_image_paths[0])
 
254
 
255
- # streamer = TextIteratorStreamer(
256
- # tokenizer, skip_prompt=True, skip_special_tokens=True
257
- # )
258
- # generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
259
-
260
- # thread = Thread(target=model.generate, kwargs=generation_kwargs)
261
- # thread.start()
262
-
263
  # response = "This is a test response"
264
  buffer = ""
265
  for new_text in response:
266
  buffer += new_text
267
- # Remove <|im_end|> or similar tokens from the output
268
- buffer = buffer.replace("<|im_end|>", "")
269
 
270
  state.update_message(Conversation.ASSISTANT, buffer + state.streaming_placeholder, None)
271
  yield (
@@ -275,8 +263,7 @@ def http_bot(
275
  ) + (disable_btn,) * 5
276
 
277
  except Exception as e:
278
- logger.error(f"Error in http_bot: {e}")
279
- traceback.print_exc()
280
  state.update_message(Conversation.ASSISTANT, server_error_msg, None)
281
  yield (
282
  state,
@@ -293,7 +280,7 @@ def http_bot(
293
 
294
  ai_response = state.return_last_message()
295
 
296
- logger.info(f"==== response ====\n{ai_response}")
297
 
298
  state.end_of_current_turn()
299
 
@@ -321,9 +308,10 @@ def http_bot(
321
  title_html = """
322
  <div style="text-align: center;">
323
  <img src="https://lh3.googleusercontent.com/pw/AP1GczMmW-aFQ4dNaR_LCAllh4UZLLx9fTZ1ITHeGVMWx-1bwlIWz4VsWJSGb3_9C7CQfvboqJH41y2Sbc5ToC9ZmKeV4-buf_DEevIMU0HtaLWgHAPOqBiIbG6LaE8CvDqniLZzvB9UX8TR_-YgvYzPFt2z=w1472-h832-s-no-gm?authuser=0" style="height: 100; width: 100%;">
 
324
  <p>Vintern-1B: An Efficient Multimodal Large Language Model for Vietnamese</p>
325
  <a href="https://huggingface.co/papers/2408.12480">[📖 Vintern Paper]</a>
326
- <a href="https://huggingface.co/5CD-AI">[🤗 5CD-AI Huggingface]</a>
327
  </div>
328
  """
329
 
@@ -411,7 +399,7 @@ def build_demo():
411
  )
412
 
413
  with gr.Blocks(
414
- title="Vintern-Chat",
415
  theme=gr.themes.Default(),
416
  css=block_css,
417
  ) as demo:
@@ -424,7 +412,7 @@ def build_demo():
424
 
425
  with gr.Accordion("Settings", open=False) as setting_row:
426
  system_prompt = gr.Textbox(
427
- value="请尽可能详细地回答用户的问题。",
428
  label="System Prompt",
429
  interactive=True,
430
  )
@@ -501,7 +489,7 @@ def build_demo():
501
  with gr.Column(scale=8):
502
  chatbot = gr.Chatbot(
503
  elem_id="chatbot",
504
- label="Vintern",
505
  height=580,
506
  show_copy_button=True,
507
  show_share_button=True,
 
37
 
38
  logger = build_logger("gradio_web_server", "gradio_web_server.log")
39
 
40
+ headers = {"User-Agent": "Vintern-1B-3.5-Demo Client"}
41
 
42
  no_change_btn = gr.Button()
43
  enable_btn = gr.Button(interactive=True)
 
181
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False)
182
 
183
  @spaces.GPU
184
+ def predict(message,
185
+ image_path,
186
+ history,
187
+ max_input_tiles=6,
188
+ temperature=1.0,
189
+ max_output_tokens=700,
190
+ top_p=0.7,
191
+ repetition_penalty=2.5):
192
+ pixel_values = load_image(image_path, max_num=max_input_tiles).to(torch.bfloat16).cuda()
193
+ generation_config = dict(temperature=temperature, max_new_tokens= max_output_tokens, top_p=top_p, do_sample=False, num_beams = 3, repetition_penalty=repetition_penalty)
194
  if pixel_values is not None:
195
  question = '<image>\n'+message
196
  else:
197
  question = message
198
+ response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=history, return_history=True)
 
 
 
 
199
  return response, conv_history
200
 
201
  def http_bot(
 
220
  return
221
 
222
  if model is None:
 
223
  state.update_message(Conversation.ASSISTANT, server_error_msg)
224
  yield (
225
  state,
 
245
 
246
  try:
247
  # Stream output
 
 
248
  message = state.get_user_message(source=state.USER)
249
+ logger.info(f"==== User message ====\n{message}")
250
+ logger.info(f"==== Image paths ====\n{all_image_paths}")
251
+ response, conv_history = predict(message, all_image_paths[0], max_input_tiles, temperature, max_new_tokens, top_p, repetition_penalty)
252
 
 
 
 
 
 
 
 
 
253
  # response = "This is a test response"
254
  buffer = ""
255
  for new_text in response:
256
  buffer += new_text
 
 
257
 
258
  state.update_message(Conversation.ASSISTANT, buffer + state.streaming_placeholder, None)
259
  yield (
 
263
  ) + (disable_btn,) * 5
264
 
265
  except Exception as e:
266
+ logger.error(f"Error in http_bot: {e} \n{traceback.format_exc()}")
 
267
  state.update_message(Conversation.ASSISTANT, server_error_msg, None)
268
  yield (
269
  state,
 
280
 
281
  ai_response = state.return_last_message()
282
 
283
+ logger.info(f"==== AI response ====\n{ai_response}")
284
 
285
  state.end_of_current_turn()
286
 
 
308
  title_html = """
309
  <div style="text-align: center;">
310
  <img src="https://lh3.googleusercontent.com/pw/AP1GczMmW-aFQ4dNaR_LCAllh4UZLLx9fTZ1ITHeGVMWx-1bwlIWz4VsWJSGb3_9C7CQfvboqJH41y2Sbc5ToC9ZmKeV4-buf_DEevIMU0HtaLWgHAPOqBiIbG6LaE8CvDqniLZzvB9UX8TR_-YgvYzPFt2z=w1472-h832-s-no-gm?authuser=0" style="height: 100; width: 100%;">
311
+ <p>🔥Vintern-1B-v3_5🔥</p>
312
  <p>Vintern-1B: An Efficient Multimodal Large Language Model for Vietnamese</p>
313
  <a href="https://huggingface.co/papers/2408.12480">[📖 Vintern Paper]</a>
314
+ <a href="https://huggingface.co/5CD-AI">[🤗 Huggingface]</a>
315
  </div>
316
  """
317
 
 
399
  )
400
 
401
  with gr.Blocks(
402
+ title="Vintern-1B-v3_5-Demo",
403
  theme=gr.themes.Default(),
404
  css=block_css,
405
  ) as demo:
 
412
 
413
  with gr.Accordion("Settings", open=False) as setting_row:
414
  system_prompt = gr.Textbox(
415
+ value="Bạn là một trợ lý AI đa phương thức hữu ích, hãy trả lời câu hỏi người dùng một cách chi tiết.",
416
  label="System Prompt",
417
  interactive=True,
418
  )
 
489
  with gr.Column(scale=8):
490
  chatbot = gr.Chatbot(
491
  elem_id="chatbot",
492
+ label="Vintern-1B-v3_5-Demo",
493
  height=580,
494
  show_copy_button=True,
495
  show_share_button=True,