File size: 3,359 Bytes
143b62d
 
29fb045
 
252caca
143b62d
4fb58cc
87bb867
143b62d
 
 
29fb045
143b62d
 
 
87bb867
4fb58cc
 
 
87bb867
 
 
 
 
 
252caca
 
 
 
87bb867
 
 
 
 
 
 
252caca
87bb867
 
 
252caca
87bb867
 
 
252caca
87bb867
 
 
 
252caca
87bb867
 
29fb045
143b62d
f961a8f
 
 
252caca
29fb045
143b62d
29fb045
143b62d
29fb045
143b62d
 
 
 
 
 
 
 
 
 
 
 
 
 
87bb867
 
143b62d
 
aa733b6
29fb045
 
 
f253a0d
143b62d
f253a0d
143b62d
 
 
29fb045
143b62d
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from dotenv import load_dotenv
import gradio as gr

import json
import html

import numpy as np

from utils.model import Model
from utils.metric import metric_rouge_score

from pages.summarization_playground import generate_answer

load_dotenv()

def display_results(response_list):
    overall_score = np.mean([r['metric_score']['rouge_score'] for r in response_list])
    
    html_output = f"<h2>Overall Score: {overall_score:.2f}</h2>"
    
    for i, item in enumerate(response_list, 1):
        dialogue = item['dialogue']
        summary = item['summary']
        response = item['response']
        rouge_score = item['metric_score']['rouge_score']

        dialogue = html.escape(item['dialogue']).replace('\n', '<br>')
        summary = html.escape(item['summary']).replace('\n', '<br>')
        response = html.escape(item['response']).replace('\n', '<br>')
        
        html_output += f"""
        <details>
        <summary>Response {i} (Rouge Score: {rouge_score:.2f})</summary>
        <div style="display: flex; justify-content: space-between;">
            <div style="width: 30%;">
                <h3>Dialogue</h3>
                <pre style="white-space: pre-wrap; word-wrap: break-word;">{dialogue}</pre>
            </div>
            <div style="width: 30%;">
                <h3>Summary</h3>
                <pre style="white-space: pre-wrap; word-wrap: break-word;">{summary}</pre>
            </div>
            <div style="width: 30%;">
                <h3>Response</h3>
                <pre style="white-space: pre-wrap; word-wrap: break-word;">{response}</pre>
            </div>
        </div>
        </details>
        """

    return html_output

def process(model_selection, prompt, num=10):
    response_list = []
    with open("test_samples/test_data.json", "r") as file:
        json_data = file.read()
        dataset = json.loads(json_data)

    for data in dataset:
        dialogue = data['dialogue']
        format = data['format']
        summary = data['summary']
        response = generate_answer(dialogue, model_selection, prompt + f' Output following {format} format.')

        rouge_score = metric_rouge_score(response, summary)

        response_list.append(
            {
                'dialogue': dialogue,
                'summary': summary,
                'response': response,
                'metric_score': {
                    'rouge_score': rouge_score
                }
            }
        )

    return display_results(response_list)


def create_batch_evaluation_interface():
    with gr.Blocks() as demo:
        gr.Markdown("## Here are evaluation setups. It will run though datapoints in test_data.josn to generate and evaluate. Show results once finished.")

        model_dropdown = gr.Dropdown(choices=Model.__model_list__, label="Choose a model", value=Model.__model_list__[0])
        Template_text = gr.Textbox(value="""Summarize the following dialogue""", label='Input Prompting Template', lines=8, placeholder='Input your prompts')
        submit_button = gr.Button("✨ Submit ✨")
        output = gr.HTML(label="Results")

        submit_button.click(
            process,
            inputs=[model_dropdown, Template_text],
            outputs=output
        )

    return demo

if __name__ == "__main__":
    demo = create_batch_evaluation_interface()
    demo.launch()