import streamlit as st from transformers import pipeline unmasker = pipeline('fill-mask', model='dsfsi/zabantu-nso-ven-170m') st.set_page_config(layout="wide") def fill_mask(sentences): results = {} warnings = [] for sentence in sentences: if "" in sentence: unmasked = unmasker(sentence) results[sentence] = unmasked else: warnings.append(f"Warning: No token found in sentence: {sentence}") return results, warnings def replace_mask(sentence, predicted_word): return sentence.replace("", f"**{predicted_word}**") st.title("Fill Mask | Zabantu-sot-ven-170m") st.write(f"") col1, col2 = st.columns(2) if 'text_input' not in st.session_state: st.session_state['text_input'] = "" if 'warnings' not in st.session_state: st.session_state['warnings'] = [] with col1: with st.container(border=True): st.markdown("Input :clipboard:") sample_sentence = "Vhana vhane vha kha ḓi bva u bebwa vha kha khombo ya u nga Listeriosis." text_input = st.text_area( "Enter sentences with token:", value=st.session_state['text_input'] ) input_sentences = text_input.split("\n") button1, button2, _ = st.columns([1.25, 1, 5.7]) with button1: if st.button("Test Example"): # st.rerun() result, warnings = fill_mask(sample_sentence.split("\n")) # st.session_state['text_input'] = sample_sentence with button2: if st.button("Submit"): result, warnings = fill_mask(input_sentences) st.session_state['warnings'] = warnings if st.session_state['warnings']: for warning in st.session_state['warnings']: st.warning(warning) st.markdown("Example") st.code(sample_sentence, wrap_lines=True) with col2: with st.container(border=True): st.markdown("Output :bar_chart:") if 'result' in locals() and result: if result: for sentence, predictions in result.items(): for prediction in predictions: predicted_word = prediction['token_str'] score = prediction['score'] * 100 st.markdown(f"""
{predicted_word}
{score:.2f}%
""", unsafe_allow_html=True) if 'result' in locals(): if result: for sentence, predictions in result.items(): predicted_word = predictions[0]['token_str'] full_sentence = replace_mask(sentence, predicted_word) st.write(f"**Sentence:** {full_sentence }") css = """ """ st.markdown(css, unsafe_allow_html=True)