from dotenv import load_dotenv import gradio as gr import random from utils.model import Model from utils.data import dataset from utils.metric import metric_rouge_score from summarization_playground import model, generate_answer load_dotenv() def process(seed, model_selection, prompt, num=10): random.seed(seed) response_list = [] for data in random.choices(dataset, k=num): dialogue = data['dialogue'] summary = data['summary'] response = generate_answer(dialogue, model, model_selection, prompt) rouge_score = metric_rouge_score(response, summary) response_list.append( { 'dialogue': dialogue, 'summary': summary, 'response': response, 'metric_score': { 'rouge_score': rouge_score } } ) return response_list def create_batch_evaluation_interface(): with gr.blocks() as demo: gr.Markdown("## Here are evaluation setups") with gr.Row(): seed = gr.Number(value=8, placeholder="pick your favoriate random seed") model_dropdown = gr.Dropdown(choices=Model.__model_list__, label="Choose a model", value=Model.__model_list__[0]) Template_text = gr.Textbox(value="""Summariza the following dialogue""", label='Input Prompting Template', lines=8, placeholder='Input your prompts') submit_button = gr.Button("✨ Submit ✨") output = gr.Markdown() submit_button.click( process, inputs=[seed, model_dropdown, Template_text], outputs=output ) return demo if __name__ == "__main__": demo = create_batch_evaluation_interface() demo.launch()