#import streamlit as st #x = st.slider('Select a value') #st.write(x, 'squared is', x * x) import streamlit as st from transformers import pipeline, AutoModelForMaskedLM, AutoTokenizer st.title("Completamento del testo in Latino con Latin BERT") st.write("Inserisci un testo con il token [MASK] per vedere le previsioni del modello.") st.write("Esempi di testo:"); st.write("Asdrubal, frater Annibalis, qui secundo Punico bello [MASK] ingentibus copiis ab Hispania veniens > cum"); st.write("hanno et mago qui [MASK]  punico bello cornelium consulem aput liparas ceperunt > primo"); st.write("Lorem ipsum dolor sit amet, [MASK] adipiscing elit. > consectetur"); st.write("Populus Romanus cum Macedonibus [MASK] ter gessit => bellum"); #Asdrubal, frater Annibalis, qui secundo Punico bello [MASK] ingentibus copiis ab Hispania veniens => cum #hanno et mago qui [MASK]  punico bello cornelium consulem aput liparas ceperunt => primo #Lorem ipsum dolor sit amet, [MASK] adipiscing elit. => consectetur input_text = st.text_input("Testo:", value="Lorem ipsum dolor sit amet, [MASK] adipiscing elit.") # Model based on BERT #modelname = "./models/latin_bert/" #Hugging face LuisAVasquez/simple-latin-bert-uncased #modelname_lv = "LuisAVasquez/simple-latin-bert-uncased" #https://github.com/dbamman/latin-bert modelname = "./models/bert-base-latin-uncased" tokenizer_robertaclasscat = AutoTokenizer.from_pretrained("ClassCat/roberta-base-latin-v2") model_robertaclasscat = AutoModelForMaskedLM.from_pretrained("ClassCat/roberta-base-latin-v2") fill_mask_robertaclasscat = pipeline("fill-mask", model=model_robertaclasscat, tokenizer=tokenizer_robertaclasscat) tokenizer_robertapstroe = AutoTokenizer.from_pretrained("pstroe/roberta-base-latin-cased") model_robertapstroe = AutoModelForMaskedLM.from_pretrained("pstroe/roberta-base-latin-cased") fill_mask_robertapstroe = pipeline("fill-mask", model=model_robertapstroe, tokenizer=tokenizer_robertapstroe) tokenizer = AutoTokenizer.from_pretrained(modelname) model = AutoModelForMaskedLM.from_pretrained(modelname) fill_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer) if input_text: predictions = fill_mask(input_text) st.subheader("Risultati delle previsioni con Bert:") for pred in predictions: st.write(f"**Parola**: {pred['token_str']}, **Probabilità**: {pred['score']:.4f}, **Sequence**: {pred['sequence']}") input_text_roberta = input_text.replace("[MASK]", "") predictions_robertaclasscat = fill_mask_robertaclasscat(input_text_roberta) st.subheader("Risultati delle previsioni con Roberta ClassCat:") for pred_robertaclasscat in predictions_robertaclasscat: st.write(f"**Parola**: {pred_robertaclasscat['token_str']}, **Probabilità**: {pred_robertaclasscat['score']:.4f}, **Sequence**: {pred_robertaclasscat['sequence']}") predictions_robertapstroe = fill_mask_robertapstroe(input_text_roberta) st.subheader("Risultati delle previsioni con Roberta Pstroe:") for pred_robertapstroe in predictions_robertapstroe: st.write(f"**Parola**: {pred_robertapstroe['token_str']}, **Probabilità**: {pred_robertapstroe['score']:.4f}, **Sequence**: {pred_robertapstroe['sequence']}")