Spaces:
Sleeping
Sleeping
File size: 2,961 Bytes
143b62d 4fb58cc 87bb867 143b62d d3d48e1 143b62d 87bb867 4fb58cc 87bb867 e302f12 87bb867 e302f12 87bb867 e302f12 87bb867 143b62d 87bb867 143b62d aa733b6 87bb867 aa733b6 143b62d af1a6de 143b62d f253a0d 143b62d f253a0d 143b62d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
from dotenv import load_dotenv
import gradio as gr
import random
import numpy as np
from utils.model import Model
from utils.data import dataset
from utils.metric import metric_rouge_score
from pages.summarization_playground import model, generate_answer
load_dotenv()
def display_results(response_list):
overall_score = np.mean([r['metric_score']['rouge_score'] for r in response_list])
html_output = f"<h2>Overall Score: {overall_score:.2f}</h2>"
for i, item in enumerate(response_list, 1):
dialogue = item['dialogue']
summary = item['summary']
response = item['response']
rouge_score = item['metric_score']['rouge_score']
html_output += f"""
<details>
<summary>Response {i} (Rouge Score: {rouge_score:.2f})</summary>
<div style="display: flex; justify-content: space-between;">
<div style="width: 30%;">
<h3>Dialogue</h3>
{dialogue}
</div>
<div style="width: 30%;">
<h3>Summary</h3>
{summary}
</div>
<div style="width: 30%;">
<h3>Response</h3>
{response}
</div>
</div>
</details>
"""
return html_output
def process(seed, model_selection, prompt, num=10):
random.seed(seed)
response_list = []
for data in random.choices(dataset, k=num):
dialogue = data['dialogue']
summary = data['summary']
response = generate_answer(dialogue, model, model_selection, prompt)
rouge_score = metric_rouge_score(response, summary)
response_list.append(
{
'dialogue': dialogue,
'summary': summary,
'response': response,
'metric_score': {
'rouge_score': rouge_score
}
}
)
return display_results(response_list)
def create_batch_evaluation_interface():
with gr.Blocks() as demo:
gr.Markdown("## Here are evaluation setups. It will randomly sample 10 data points to generate and evaluate. Show results once finished.")
with gr.Row():
seed = gr.Number(value=8, info="pick your favoriate random seed", precision=0)
model_dropdown = gr.Dropdown(choices=Model.__model_list__, label="Choose a model", value=Model.__model_list__[0])
Template_text = gr.Textbox(value="""Summarize the following dialogue""", label='Input Prompting Template', lines=8, placeholder='Input your prompts')
submit_button = gr.Button("✨ Submit ✨")
output = gr.HTML(label="Results")
submit_button.click(
process,
inputs=[seed, model_dropdown, Template_text],
outputs=output
)
return demo
if __name__ == "__main__":
demo = create_batch_evaluation_interface()
demo.launch() |