Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
2013bf3
1
Parent(s):
9f4e146
Update user agent and enhance predict function with additional parameters for improved flexibility
Browse files
app.py
CHANGED
@@ -37,7 +37,7 @@ torch.set_default_device('cuda')
|
|
37 |
|
38 |
logger = build_logger("gradio_web_server", "gradio_web_server.log")
|
39 |
|
40 |
-
headers = {"User-Agent": "Vintern-
|
41 |
|
42 |
no_change_btn = gr.Button()
|
43 |
enable_btn = gr.Button(interactive=True)
|
@@ -181,21 +181,21 @@ model = AutoModel.from_pretrained(
|
|
181 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False)
|
182 |
|
183 |
@spaces.GPU
|
184 |
-
def predict(message,
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
|
|
|
|
|
|
|
|
190 |
if pixel_values is not None:
|
191 |
question = '<image>\n'+message
|
192 |
else:
|
193 |
question = message
|
194 |
-
|
195 |
-
print("Tokenizer: ", tokenizer)
|
196 |
-
print("Question: ", question)
|
197 |
-
response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
|
198 |
-
print(f"AI response: {response}")
|
199 |
return response, conv_history
|
200 |
|
201 |
def http_bot(
|
@@ -220,7 +220,6 @@ def http_bot(
|
|
220 |
return
|
221 |
|
222 |
if model is None:
|
223 |
-
# state.messages[-1][-1] = server_error_msg
|
224 |
state.update_message(Conversation.ASSISTANT, server_error_msg)
|
225 |
yield (
|
226 |
state,
|
@@ -246,26 +245,15 @@ def http_bot(
|
|
246 |
|
247 |
try:
|
248 |
# Stream output
|
249 |
-
# response = requests.post(worker_addr, json=pload, headers=headers, stream=True, timeout=300)
|
250 |
-
print(f"all_image_paths: {all_image_paths}")
|
251 |
message = state.get_user_message(source=state.USER)
|
252 |
-
|
253 |
-
|
|
|
254 |
|
255 |
-
# streamer = TextIteratorStreamer(
|
256 |
-
# tokenizer, skip_prompt=True, skip_special_tokens=True
|
257 |
-
# )
|
258 |
-
# generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
|
259 |
-
|
260 |
-
# thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
261 |
-
# thread.start()
|
262 |
-
|
263 |
# response = "This is a test response"
|
264 |
buffer = ""
|
265 |
for new_text in response:
|
266 |
buffer += new_text
|
267 |
-
# Remove <|im_end|> or similar tokens from the output
|
268 |
-
buffer = buffer.replace("<|im_end|>", "")
|
269 |
|
270 |
state.update_message(Conversation.ASSISTANT, buffer + state.streaming_placeholder, None)
|
271 |
yield (
|
@@ -275,8 +263,7 @@ def http_bot(
|
|
275 |
) + (disable_btn,) * 5
|
276 |
|
277 |
except Exception as e:
|
278 |
-
logger.error(f"Error in http_bot: {e}")
|
279 |
-
traceback.print_exc()
|
280 |
state.update_message(Conversation.ASSISTANT, server_error_msg, None)
|
281 |
yield (
|
282 |
state,
|
@@ -293,7 +280,7 @@ def http_bot(
|
|
293 |
|
294 |
ai_response = state.return_last_message()
|
295 |
|
296 |
-
logger.info(f"==== response ====\n{ai_response}")
|
297 |
|
298 |
state.end_of_current_turn()
|
299 |
|
@@ -321,9 +308,10 @@ def http_bot(
|
|
321 |
title_html = """
|
322 |
<div style="text-align: center;">
|
323 |
<img src="https://lh3.googleusercontent.com/pw/AP1GczMmW-aFQ4dNaR_LCAllh4UZLLx9fTZ1ITHeGVMWx-1bwlIWz4VsWJSGb3_9C7CQfvboqJH41y2Sbc5ToC9ZmKeV4-buf_DEevIMU0HtaLWgHAPOqBiIbG6LaE8CvDqniLZzvB9UX8TR_-YgvYzPFt2z=w1472-h832-s-no-gm?authuser=0" style="height: 100; width: 100%;">
|
|
|
324 |
<p>Vintern-1B: An Efficient Multimodal Large Language Model for Vietnamese</p>
|
325 |
<a href="https://huggingface.co/papers/2408.12480">[📖 Vintern Paper]</a>
|
326 |
-
<a href="https://huggingface.co/5CD-AI">[🤗
|
327 |
</div>
|
328 |
"""
|
329 |
|
@@ -411,7 +399,7 @@ def build_demo():
|
|
411 |
)
|
412 |
|
413 |
with gr.Blocks(
|
414 |
-
title="Vintern-
|
415 |
theme=gr.themes.Default(),
|
416 |
css=block_css,
|
417 |
) as demo:
|
@@ -424,7 +412,7 @@ def build_demo():
|
|
424 |
|
425 |
with gr.Accordion("Settings", open=False) as setting_row:
|
426 |
system_prompt = gr.Textbox(
|
427 |
-
value="
|
428 |
label="System Prompt",
|
429 |
interactive=True,
|
430 |
)
|
@@ -501,7 +489,7 @@ def build_demo():
|
|
501 |
with gr.Column(scale=8):
|
502 |
chatbot = gr.Chatbot(
|
503 |
elem_id="chatbot",
|
504 |
-
label="Vintern",
|
505 |
height=580,
|
506 |
show_copy_button=True,
|
507 |
show_share_button=True,
|
|
|
37 |
|
38 |
logger = build_logger("gradio_web_server", "gradio_web_server.log")
|
39 |
|
40 |
+
headers = {"User-Agent": "Vintern-1B-3.5-Demo Client"}
|
41 |
|
42 |
no_change_btn = gr.Button()
|
43 |
enable_btn = gr.Button(interactive=True)
|
|
|
181 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False)
|
182 |
|
183 |
@spaces.GPU
|
184 |
+
def predict(message,
|
185 |
+
image_path,
|
186 |
+
history,
|
187 |
+
max_input_tiles=6,
|
188 |
+
temperature=1.0,
|
189 |
+
max_output_tokens=700,
|
190 |
+
top_p=0.7,
|
191 |
+
repetition_penalty=2.5):
|
192 |
+
pixel_values = load_image(image_path, max_num=max_input_tiles).to(torch.bfloat16).cuda()
|
193 |
+
generation_config = dict(temperature=temperature, max_new_tokens= max_output_tokens, top_p=top_p, do_sample=False, num_beams = 3, repetition_penalty=repetition_penalty)
|
194 |
if pixel_values is not None:
|
195 |
question = '<image>\n'+message
|
196 |
else:
|
197 |
question = message
|
198 |
+
response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=history, return_history=True)
|
|
|
|
|
|
|
|
|
199 |
return response, conv_history
|
200 |
|
201 |
def http_bot(
|
|
|
220 |
return
|
221 |
|
222 |
if model is None:
|
|
|
223 |
state.update_message(Conversation.ASSISTANT, server_error_msg)
|
224 |
yield (
|
225 |
state,
|
|
|
245 |
|
246 |
try:
|
247 |
# Stream output
|
|
|
|
|
248 |
message = state.get_user_message(source=state.USER)
|
249 |
+
logger.info(f"==== User message ====\n{message}")
|
250 |
+
logger.info(f"==== Image paths ====\n{all_image_paths}")
|
251 |
+
response, conv_history = predict(message, all_image_paths[0], max_input_tiles, temperature, max_new_tokens, top_p, repetition_penalty)
|
252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
253 |
# response = "This is a test response"
|
254 |
buffer = ""
|
255 |
for new_text in response:
|
256 |
buffer += new_text
|
|
|
|
|
257 |
|
258 |
state.update_message(Conversation.ASSISTANT, buffer + state.streaming_placeholder, None)
|
259 |
yield (
|
|
|
263 |
) + (disable_btn,) * 5
|
264 |
|
265 |
except Exception as e:
|
266 |
+
logger.error(f"Error in http_bot: {e} \n{traceback.format_exc()}")
|
|
|
267 |
state.update_message(Conversation.ASSISTANT, server_error_msg, None)
|
268 |
yield (
|
269 |
state,
|
|
|
280 |
|
281 |
ai_response = state.return_last_message()
|
282 |
|
283 |
+
logger.info(f"==== AI response ====\n{ai_response}")
|
284 |
|
285 |
state.end_of_current_turn()
|
286 |
|
|
|
308 |
title_html = """
|
309 |
<div style="text-align: center;">
|
310 |
<img src="https://lh3.googleusercontent.com/pw/AP1GczMmW-aFQ4dNaR_LCAllh4UZLLx9fTZ1ITHeGVMWx-1bwlIWz4VsWJSGb3_9C7CQfvboqJH41y2Sbc5ToC9ZmKeV4-buf_DEevIMU0HtaLWgHAPOqBiIbG6LaE8CvDqniLZzvB9UX8TR_-YgvYzPFt2z=w1472-h832-s-no-gm?authuser=0" style="height: 100; width: 100%;">
|
311 |
+
<p>🔥Vintern-1B-v3_5🔥</p>
|
312 |
<p>Vintern-1B: An Efficient Multimodal Large Language Model for Vietnamese</p>
|
313 |
<a href="https://huggingface.co/papers/2408.12480">[📖 Vintern Paper]</a>
|
314 |
+
<a href="https://huggingface.co/5CD-AI">[🤗 Huggingface]</a>
|
315 |
</div>
|
316 |
"""
|
317 |
|
|
|
399 |
)
|
400 |
|
401 |
with gr.Blocks(
|
402 |
+
title="Vintern-1B-v3_5-Demo",
|
403 |
theme=gr.themes.Default(),
|
404 |
css=block_css,
|
405 |
) as demo:
|
|
|
412 |
|
413 |
with gr.Accordion("Settings", open=False) as setting_row:
|
414 |
system_prompt = gr.Textbox(
|
415 |
+
value="Bạn là một trợ lý AI đa phương thức hữu ích, hãy trả lời câu hỏi người dùng một cách chi tiết.",
|
416 |
label="System Prompt",
|
417 |
interactive=True,
|
418 |
)
|
|
|
489 |
with gr.Column(scale=8):
|
490 |
chatbot = gr.Chatbot(
|
491 |
elem_id="chatbot",
|
492 |
+
label="Vintern-1B-v3_5-Demo",
|
493 |
height=580,
|
494 |
show_copy_button=True,
|
495 |
show_share_button=True,
|