File size: 1,758 Bytes
21c571e
933d893
21c571e
 
0659652
c7ab302
21c571e
0659652
 
 
 
7b22e2e
0659652
 
ad7b7bc
b5fe0df
92189ac
9eda48b
92189ac
1f69fb9
cfc942c
ad7b7bc
b5fe0df
 
21c571e
ad7b7bc
9eda48b
 
 
 
0659652
 
9eda48b
0659652
7b22e2e
9eda48b
 
 
 
dda8d7a
ff83a69
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#import streamlit as st

#x = st.slider('Select a value')
#st.write(x, 'squared is', x * x)
import streamlit as st
from transformers import pipeline, AutoModelForMaskedLM, AutoTokenizer


st.title("Completamento del testo in Latino con Latin BERT")
st.write("Inserisci un testo con il token [MASK] per vedere le previsioni del modello.")

#dvces et reges carthaginiensivm hanno et mago qui [MASK] punico bello cornelium consulem aput liparas ceperunt
input_text = st.text_input("Testo:", value="Lorem ipsum dolor sit amet, [MASK] adipiscing elit.")

# Model based on BERT
#modelname = "./models/latin_bert/"
#Hugging face LuisAVasquez/simple-latin-bert-uncased
modelname_lv = "LuisAVasquez/simple-latin-bert-uncased"
#https://github.com/dbamman/latin-bert
modelname = "./models/bert-base-latin-uncased"


tokenizer = AutoTokenizer.from_pretrained(modelname)
model = AutoModelForMaskedLM.from_pretrained(modelname)
fill_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer)

tokenizer_lv = AutoTokenizer.from_pretrained(modelname_lv)
model_lv = AutoModelForMaskedLM.from_pretrained(modelname_lv)
fill_mask_lv = pipeline("fill-mask", model=model_lv, tokenizer=tokenizer_lv)

if input_text:
    predictions = fill_mask(input_text)
    st.subheader("Risultati delle previsioni con Bert Base Latin Uncased:")
    for pred in predictions:
        st.write(f"**Parola**: {pred['token_str']}, **Probabilità**: {pred['score']:.4f}, **Sequence**: {pred['sequence']}")
    predictions_lv = fill_mask_lv(input_text)
    st.subheader("Risultati delle previsioni con Simple Latin Bert:")
    for pred_lv in predictions_lv:
        st.write(f"**Parola**: {pred_lv['token_str']}, **Probabilità**: {pred_lv['score']:.4f}, **Sequence**: {pred_lv['sequence']}")