LLMhistory / app.py
freQuensy23's picture
Fix async gpu
cc03544
raw
history blame
1.75 kB
from dotenv import load_dotenv
from generators import *
import gradio as gr
from utils import async_zip_stream
load_dotenv()
async def handle(system_input: str, user_input: str):
print(system_input, user_input)
buffers = ["", "", "", ""]
async for outputs in async_zip_stream(
generate_gpt2(system_input, user_input),
generate_mistral_7bvo1(system_input, user_input),
generate_llama2(system_input, user_input),
generate_llama3(system_input, user_input),
):
# gpt_output, mistral_output, llama_output, llama2_output, llama3_output, llama4_output = outputs
for i, b in enumerate(buffers):
buffers[i] += str(outputs[i])
yield list(buffers) + ["", ""]
yield list(buffers) + [generate_openllama(system_input, user_input),
generate_bloom(system_input, user_input)]
with gr.Blocks() as demo:
system_input = gr.Textbox(label='System Input', value='You are AI assistant', lines=2)
with gr.Row():
gpt = gr.Textbox(label='gpt-2', lines=4, interactive=False)
mistral = gr.Textbox(label='mistral', lines=4, interactive=False)
llama = gr.Textbox(label='openllama', lines=4, interactive=False)
with gr.Row():
llama2 = gr.Textbox(label='llama-2', lines=4, interactive=False)
llama3 = gr.Textbox(label='llama-3', lines=4, interactive=False)
bloom = gr.Textbox(label='bloom', lines=4, interactive=False)
user_input = gr.Textbox(label='User Input', lines=2)
gen_button = gr.Button('Generate')
gen_button.click(
fn=handle,
inputs=[system_input, user_input],
outputs=[gpt, mistral, llama2, llama3, llama, bloom],
)
demo.launch()