File size: 1,260 Bytes
d7df580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5697c0f
d7df580
 
 
 
 
 
 
 
 
 
 
74d3c44
5697c0f
 
 
 
 
 
 
 
 
 
 
594ad29
5697c0f
 
 
b9753c7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import py_vncorenlp
from sentence_transformers import CrossEncoder
py_vncorenlp.download_model(save_dir='/absolute/path/to/vncorenlp')
rdrsegmenter = py_vncorenlp.VnCoreNLP(annotators=["wseg"], save_dir='/absolute/path/to/vncorenlp')

def rerank(query,sentences):
    tokenized_query = rdrsegmenter.word_segment(query)
    tokenized_sentences = [rdrsegmenter.word_segment(sent) for sent in sentences]

    tokenized_pairs = [[tokenized_query, sent] for sent in tokenized_sentences]

    MODEL_ID = 'itdainb/PhoRanker'
    MAX_LENGTH = 512 

    model = CrossEncoder(MODEL_ID, max_length=MAX_LENGTH)
    
    # For fp16 usage
    model.model.half()

    scores = model.predict(tokenized_pairs)

    # 0.982, 0.2444, 0.9253
    'print(scores)'
    return scores




# Create Gradio interface
interface = gr.Interface(
    fn=rerank,
    inputs=[
        gr.Textbox(label="Query", placeholder="Enter your query"),
        gr.Textbox(label="Documents (one per line)", lines=5, placeholder="Enter documents to rank"),
    ],
    outputs=gr.Textbox(label="Reranked Documents"),
    title="MonoT5 Reranking",
    description="Provide a query and a list of documents to rerank them using MonoT5."
)

# Launch the app
if __name__ == "__main__":
    interface.launch()