Chris-lab / pages /batch_evaluation.py
kz209
update
d3d48e1
raw
history blame
1.77 kB
from dotenv import load_dotenv
import gradio as gr
import random
from utils.model import Model
from utils.data import dataset
from utils.metric import metric_rouge_score
from pages.summarization_playground import model, generate_answer
load_dotenv()
def process(seed, model_selection, prompt, num=10):
random.seed(seed)
response_list = []
for data in random.choices(dataset, k=num):
dialogue = data['dialogue']
summary = data['summary']
response = generate_answer(dialogue, model, model_selection, prompt)
rouge_score = metric_rouge_score(response, summary)
response_list.append(
{
'dialogue': dialogue,
'summary': summary,
'response': response,
'metric_score': {
'rouge_score': rouge_score
}
}
)
return response_list
def create_batch_evaluation_interface():
with gr.blocks() as demo:
gr.Markdown("## Here are evaluation setups")
with gr.Row():
seed = gr.Number(value=8, placeholder="pick your favoriate random seed")
model_dropdown = gr.Dropdown(choices=Model.__model_list__, label="Choose a model", value=Model.__model_list__[0])
Template_text = gr.Textbox(value="""Summariza the following dialogue""", label='Input Prompting Template', lines=8, placeholder='Input your prompts')
submit_button = gr.Button("✨ Submit ✨")
output = gr.Markdown()
submit_button.click(
process,
inputs=[seed, model_dropdown, Template_text],
outputs=output
)
return demo
if __name__ == "__main__":
demo = create_batch_evaluation_interface()
demo.launch()