import copy import random import gradio as gr TEST = """ Test of Time. A Benchmark for Evaluating LLMs on Temporal Reasoning. Large language models (LLMs) have showcased remarkable reasoning capabilities, yet they remain susceptible to errors, particularly in temporal reasoning tasks involving complex temporal logic. """ def generate_data_test(): """Generator to yield words""" temp = copy.deepcopy(TEST) l1 = temp.split() random.shuffle(l1) temp = ' '.join(l1) for word in temp.split(" "): yield word + " " def stream_data(content_list, model): """Stream data to three columns""" outputs = ["" for _ in content_list] # Use the gen method to handle batch generation generator = model.gen(content_list, streaming=True) while True: updated = False try: id, word = next(generator) # Get the next generated word for the corresponding content outputs[id] += f" {word}" updated = True except StopIteration: break if updated: yield tuple(outputs) def create_interface(): with gr.Blocks() as demo: with gr.Group(): with gr.Row(): columns = [gr.Textbox(label=f"Column {i+1}", lines=10) for i in range(3)] start_btn = gr.Button("Start Streaming") def start_streaming(): content_list = [col.value for col in columns] # Get input texts from text boxes for data in stream_data(content_list): updates = [gr.update(value=data[i]) for i in range(len(columns))] yield tuple(updates) start_btn.click( fn=start_streaming, inputs=[], outputs=columns, show_progress=False ) return demo if __name__ == "__main__": demo = create_interface() demo.queue() demo.launch()