Spaces:
Runtime error
Runtime error
from lmdeploy.serve.gradio.turbomind_coupled import * | |
from lmdeploy.messages import TurbomindEngineConfig | |
from lmdeploy import ChatTemplateConfig | |
chat_template = ChatTemplateConfig(model_name='internlm2-chat-7b', system='', eosys='', meta_instruction='') | |
backend_config = TurbomindEngineConfig(model_name='internlm2-chat-7b', max_batch_size=1, cache_max_entry_count=0.05)#, model_format='awq') | |
model_path = 'internlm/internlm2-math-7b' | |
InterFace.async_engine = AsyncEngine( | |
model_path=model_path, | |
backend='turbomind', | |
backend_config=backend_config, | |
chat_template_config=chat_template, | |
tp=1) | |
async def reset_local_func(instruction_txtbox: gr.Textbox, | |
state_chatbot: Sequence, session_id: int): | |
"""reset the session. | |
Args: | |
instruction_txtbox (str): user's prompt | |
state_chatbot (Sequence): the chatting history | |
session_id (int): the session id | |
""" | |
state_chatbot = [] | |
# end the session | |
with InterFace.lock: | |
InterFace.global_session_id += 1 | |
session_id = InterFace.global_session_id | |
return (state_chatbot, state_chatbot, gr.Textbox.update(value=''), session_id) | |
async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button, | |
reset_btn: gr.Button, session_id: int): | |
"""stop the session. | |
Args: | |
instruction_txtbox (str): user's prompt | |
state_chatbot (Sequence): the chatting history | |
cancel_btn (gr.Button): the cancel button | |
reset_btn (gr.Button): the reset button | |
session_id (int): the session id | |
""" | |
yield (state_chatbot, disable_btn, disable_btn, session_id) | |
InterFace.async_engine.stop_session(session_id) | |
# pytorch backend does not support resume chat history now | |
if InterFace.async_engine.backend == 'pytorch': | |
yield (state_chatbot, disable_btn, enable_btn, session_id) | |
else: | |
with InterFace.lock: | |
InterFace.global_session_id += 1 | |
session_id = InterFace.global_session_id | |
messages = [] | |
for qa in state_chatbot: | |
messages.append(dict(role='user', content=qa[0])) | |
if qa[1] is not None: | |
messages.append(dict(role='assistant', content=qa[1])) | |
gen_config = GenerationConfig(max_new_tokens=0) | |
async for out in InterFace.async_engine.generate(messages, | |
session_id, | |
gen_config=gen_config, | |
stream_response=True, | |
sequence_start=True, | |
sequence_end=False): | |
pass | |
yield (state_chatbot, disable_btn, enable_btn, session_id) | |
with gr.Blocks(css=CSS, theme=THEME) as demo: | |
state_chatbot = gr.State([]) | |
state_session_id = gr.State(0) | |
with gr.Column(elem_id='container'): | |
gr.Markdown('## LMDeploy Playground') | |
chatbot = gr.Chatbot( | |
elem_id='chatbot', | |
label=InterFace.async_engine.engine.model_name) | |
instruction_txtbox = gr.Textbox( | |
placeholder='Please input the instruction', | |
label='Instruction') | |
with gr.Row(): | |
cancel_btn = gr.Button(value='Cancel', interactive=False) | |
reset_btn = gr.Button(value='Reset') | |
with gr.Row(): | |
request_output_len = gr.Slider(1, | |
2048, | |
value=1024, | |
step=1, | |
label='Maximum new tokens') | |
top_p = gr.Slider(0.01, 1, value=1.0, step=0.01, label='Top_p') | |
temperature = gr.Slider(0.01, | |
1.5, | |
value=0.01, | |
step=0.01, | |
label='Temperature') | |
send_event = instruction_txtbox.submit(chat_stream_local, [ | |
instruction_txtbox, state_chatbot, cancel_btn, reset_btn, | |
state_session_id, top_p, temperature, request_output_len | |
], [state_chatbot, chatbot, cancel_btn, reset_btn]) | |
instruction_txtbox.submit( | |
lambda: gr.Textbox.update(value=''), | |
[], | |
[instruction_txtbox], | |
) | |
cancel_btn.click( | |
cancel_local_func, | |
[state_chatbot, cancel_btn, reset_btn, state_session_id], | |
[state_chatbot, cancel_btn, reset_btn, state_session_id], | |
cancels=[send_event]) | |
reset_btn.click(reset_local_func, | |
[instruction_txtbox, state_chatbot, state_session_id], | |
[state_chatbot, chatbot, instruction_txtbox, state_session_id], | |
cancels=[send_event]) | |
def init(): | |
with InterFace.lock: | |
InterFace.global_session_id += 1 | |
new_session_id = InterFace.global_session_id | |
return new_session_id | |
demo.load(init, inputs=None, outputs=[state_session_id]) | |
demo.queue(concurrency_count=InterFace.async_engine.instance_num, | |
max_size=100).launch() |