File size: 1,685 Bytes
a09c6ce
b585e42
 
 
a09c6ce
 
b585e42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from jinja2 import Template
import torch


# load the judge
device = "cuda:0"
model_name = "collinear-ai/collinear-reliability-judge-v1-deberta-ext"
model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)


# tempalte
template = Template(
"""
# Document:
{{ text }}

# Conversation:
{{ conversation }}
"""
)


def judge_reliability(document: str, conversation: str):
    with torch.no_grad():
        text = template.render(text=document, conversation=conversation)
        encoded = tokenizer([text], padding=True)
        input_ids = torch.tensor(encoded.input_ids).to(device)
        attention_mask = torch.tensor(encoded.attention_mask).to(device)
        outputs = model.forward(input_ids=input_ids, attention_mask=attention_mask)
        outputs = torch.softmax(outputs.logits, axis=1)
        results = f"Reliability Score: {outputs[0][1]}"
    return results

demo = gr.Interface(
    fn=judge_reliability,
    inputs=[
        gr.Textbox(label="Document", lines=5, value="CV was born in Iowa"),
        gr.Textbox(label="Conversation", lines=5, value='[{"role": "user", "content": "Where are you born?"}, {"role": "assistant", "content": "I am born in Iowa"}]')
    ],
    outputs=gr.Textbox(label="Results"),
    title="Collinear Reliability Judge",
    description="Enter a document and conversation (json formatted) to judge reliability. Note: this judges if the last assistant turn is faithful according to the given document ",
    theme="default"
)

if __name__ == "__main__":
    demo.launch()