import copy
import random
import gradio as gr

TEST = """ Test of Time. A Benchmark for Evaluating LLMs on Temporal Reasoning. Large language models (LLMs) have 
showcased remarkable reasoning capabilities, yet they remain susceptible to errors, particularly in temporal 
reasoning tasks involving complex temporal logic. """

def generate_data_test():
    """Generator to yield words"""
    temp = copy.deepcopy(TEST)
    l1 = temp.split()
    random.shuffle(l1)
    temp = ' '.join(l1)
    for word in temp.split(" "):
        yield word + " "

def stream_data(content_list, model):
    """Stream data to three columns"""
    outputs = ["" for _ in content_list]

    # Use the gen method to handle batch generation
    generator = model.streaming(content_list)
    
    while True:
        updated = False

        try:
            id, word = next(generator)  # Get the next generated word for the corresponding content
            outputs[id] += f"{word} "
            updated = True
        except StopIteration:
            break
        
        if updated:
            yield tuple(outputs)


def create_interface():
    with gr.Blocks() as demo:
        with gr.Group():
            with gr.Row():
                columns = [gr.Textbox(label=f"Column {i+1}", lines=10) for i in range(3)]
            
            start_btn = gr.Button("Start Streaming")
            
            def start_streaming():
                content_list = [col.value for col in columns]  # Get input texts from text boxes
                for data in stream_data(content_list):
                    updates = [gr.update(value=data[i]) for i in range(len(columns))]
                    yield tuple(updates)
            
            start_btn.click(
                fn=start_streaming,
                inputs=[],
                outputs=columns,
                show_progress=False
            )

    return demo


if __name__ == "__main__":
    demo = create_interface()
    demo.queue()
    demo.launch()