import copy import random from time import sleep import gradio as gr from utils.model import Model TEST = """ Test of Time. A Benchmark for Evaluating LLMs on Temporal Reasoning. Large language models (LLMs) have showcased remarkable reasoning capabilities, yet they remain susceptible to errors, particularly in temporal reasoning tasks involving complex temporal logic. """ def generate_data_test(): """Generator to yield words""" temp = copy.deepcopy(TEST) l1 = temp.split() random.shuffle(l1) temp = ' '.join(l1) for word in temp.split(" "): yield word + " " def stream_data(content_list, model): """Stream data to three columns""" outputs = ["" for _ in content_list] # Use the gen method to handle batch generation while True: updated = False #for i, content in enumerate(content_list): try: words = next(model.gen(content_list, streaming=True)) # Wrap content in a list to match expected input type outputs = [outputs[i].append(f" {words[i]}") for i in range(len(content_list))] updated = True except StopIteration: pass if not updated: break yield tuple(outputs) def create_interface(): with gr.Blocks() as demo: with gr.Group(): with gr.Row(): columns = [gr.Textbox(label=f"Column {i+1}", lines=10) for i in range(3)] start_btn = gr.Button("Start Streaming") def start_streaming(): content_list = [col.value for col in columns] # Get input texts from text boxes for data in stream_data(content_list): updates = [gr.update(value=data[i]) for i in range(len(columns))] yield tuple(updates) start_btn.click( fn=start_streaming, inputs=[], outputs=columns, show_progress=False ) return demo if __name__ == "__main__": demo = create_interface() demo.queue() demo.launch()