Spaces:
Sleeping
Sleeping
File size: 3,359 Bytes
143b62d 29fb045 252caca 143b62d 4fb58cc 87bb867 143b62d 29fb045 143b62d 87bb867 4fb58cc 87bb867 252caca 87bb867 252caca 87bb867 252caca 87bb867 252caca 87bb867 252caca 87bb867 29fb045 143b62d f961a8f 252caca 29fb045 143b62d 29fb045 143b62d 29fb045 143b62d 87bb867 143b62d aa733b6 29fb045 f253a0d 143b62d f253a0d 143b62d 29fb045 143b62d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
from dotenv import load_dotenv
import gradio as gr
import json
import html
import numpy as np
from utils.model import Model
from utils.metric import metric_rouge_score
from pages.summarization_playground import generate_answer
load_dotenv()
def display_results(response_list):
overall_score = np.mean([r['metric_score']['rouge_score'] for r in response_list])
html_output = f"<h2>Overall Score: {overall_score:.2f}</h2>"
for i, item in enumerate(response_list, 1):
dialogue = item['dialogue']
summary = item['summary']
response = item['response']
rouge_score = item['metric_score']['rouge_score']
dialogue = html.escape(item['dialogue']).replace('\n', '<br>')
summary = html.escape(item['summary']).replace('\n', '<br>')
response = html.escape(item['response']).replace('\n', '<br>')
html_output += f"""
<details>
<summary>Response {i} (Rouge Score: {rouge_score:.2f})</summary>
<div style="display: flex; justify-content: space-between;">
<div style="width: 30%;">
<h3>Dialogue</h3>
<pre style="white-space: pre-wrap; word-wrap: break-word;">{dialogue}</pre>
</div>
<div style="width: 30%;">
<h3>Summary</h3>
<pre style="white-space: pre-wrap; word-wrap: break-word;">{summary}</pre>
</div>
<div style="width: 30%;">
<h3>Response</h3>
<pre style="white-space: pre-wrap; word-wrap: break-word;">{response}</pre>
</div>
</div>
</details>
"""
return html_output
def process(model_selection, prompt, num=10):
response_list = []
with open("test_samples/test_data.json", "r") as file:
json_data = file.read()
dataset = json.loads(json_data)
for data in dataset:
dialogue = data['dialogue']
format = data['format']
summary = data['summary']
response = generate_answer(dialogue, model_selection, prompt + f' Output following {format} format.')
rouge_score = metric_rouge_score(response, summary)
response_list.append(
{
'dialogue': dialogue,
'summary': summary,
'response': response,
'metric_score': {
'rouge_score': rouge_score
}
}
)
return display_results(response_list)
def create_batch_evaluation_interface():
with gr.Blocks() as demo:
gr.Markdown("## Here are evaluation setups. It will run though datapoints in test_data.josn to generate and evaluate. Show results once finished.")
model_dropdown = gr.Dropdown(choices=Model.__model_list__, label="Choose a model", value=Model.__model_list__[0])
Template_text = gr.Textbox(value="""Summarize the following dialogue""", label='Input Prompting Template', lines=8, placeholder='Input your prompts')
submit_button = gr.Button("✨ Submit ✨")
output = gr.HTML(label="Results")
submit_button.click(
process,
inputs=[model_dropdown, Template_text],
outputs=output
)
return demo
if __name__ == "__main__":
demo = create_batch_evaluation_interface()
demo.launch() |