Zabantu-nso-120m

Sleeping

App Files Files Community

UnarineLeo commited on Oct 10, 2024

Commit

ddd231c

verified ·

1 Parent(s): d028f12

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -23

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import streamlit as st
 from transformers import pipeline
 unmasker = pipeline('fill-mask', model='dsfsi/zabantu-nso-120m')
@@ -21,6 +22,7 @@ def replace_mask(sentence, predicted_word):
 st.title("Fill Mask | Zabantu-nso-120m")
 st.write(f"")
 st.markdown("This is a variant of Zabantu pre-trained on a monolingual dataset of Sepedi(nso) sentences on a transformer network with 120 million traininable parameters.")
 col1, col2 = st.columns(2)
@@ -34,26 +36,37 @@ if 'warnings' not in st.session_state:
 with col1:
     with st.container(border=True):
         st.markdown("Input :clipboard:")
-        sample_sentence = "bašomedi ba polase ya dinamune ya zebediela citrus ba hlomile magato a <mask> malebana le go se sepetšwe botse ga dilo ka polaseng eo."
-        text_input = st.text_area(
-            "Enter sentences with <mask> token:",
-            value=st.session_state['text_input']
-        )
-        input_sentences = text_input.split("\n")
-        button1, button2, _ = st.columns([2, 2, 4])
-        with button1:
-            if st.button("Test Example"):
-                # st.rerun()
-                result, warnings = fill_mask(sample_sentence.split("\n"))
-                # st.session_state['text_input'] = sample_sentence
-        with button2:
-            if st.button("Submit"):
                 result, warnings = fill_mask(input_sentences)
                 st.session_state['warnings'] = warnings
         if st.session_state['warnings']:
             for warning in st.session_state['warnings']:
@@ -61,12 +74,14 @@ with col1:
         st.markdown("Example")
         st.code(sample_sentence, wrap_lines=True)
 with col2:
     with st.container(border=True):
         st.markdown("Output :bar_chart:")
         if 'result' in locals() and result:
-            if result:
                 for sentence, predictions in result.items():
                     for prediction in predictions:
                         predicted_word = prediction['token_str']
@@ -82,12 +97,34 @@ with col2:
                         </div>
                         """, unsafe_allow_html=True)
 if 'result' in locals():
-        if result:
-            for sentence, predictions in result.items():
-                predicted_word = predictions[0]['token_str']
-                full_sentence = replace_mask(sentence, predicted_word)
-                st.write(f"**Sentence:** {full_sentence }")
 css = """
 <style>

 import streamlit as st
 from transformers import pipeline
+from io import StringIO
 unmasker = pipeline('fill-mask', model='dsfsi/zabantu-nso-120m')
 st.title("Fill Mask | Zabantu-nso-120m")
 st.write(f"")
 st.markdown("This is a variant of Zabantu pre-trained on a monolingual dataset of Sepedi(nso) sentences on a transformer network with 120 million traininable parameters.")
 col1, col2 = st.columns(2)
 with col1:
     with st.container(border=True):
         st.markdown("Input :clipboard:")
+        select_options = ['Choose option', 'Enter text input', 'Upload a file(csv/txt)']
+        sample_sentence = "Vhana vhane vha kha ḓi bva u bebwa vha kha khombo ya u <mask> nga Listeriosis."
+        option_selected = st.selectbox(f"Select an input option:", select_options, index=0)
+        if option_selected == 'Enter text input':
+            text_input = st.text_area(
+                "Enter sentences with <mask> token:",
+                value=st.session_state['text_input']
+            )
+            input_sentences = text_input.split("\n")
+            if st.button("Submit",use_container_width=True):
                 result, warnings = fill_mask(input_sentences)
                 st.session_state['warnings'] = warnings
+        if option_selected == 'Upload a file(csv/txt)':
+            uploaded_file = st.file_uploader("Choose a file")
+            if uploaded_file is not None:
+                stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
+                string_data = stringio.read()
+                input_sentences = string_data.split("\n")
+                if st.button("Submit",use_container_width=True):
+                    result, warnings = fill_mask(input_sentences)
+                    st.session_state['warnings'] = warnings
         if st.session_state['warnings']:
             for warning in st.session_state['warnings']:
         st.markdown("Example")
         st.code(sample_sentence, wrap_lines=True)
+        if st.button("Test Example",use_container_width=True):
+            result, warnings = fill_mask(sample_sentence.split("\n"))
 with col2:
     with st.container(border=True):
         st.markdown("Output :bar_chart:")
         if 'result' in locals() and result:
+            if len(result) == 1:
                 for sentence, predictions in result.items():
                     for prediction in predictions:
                         predicted_word = prediction['token_str']
                         </div>
                         """, unsafe_allow_html=True)
+            else:
+                index = 0
+                for sentence, predictions in result.items():
+                    index += 1
+                    if predictions:
+                        top_prediction = predictions[0]
+                        predicted_word = top_prediction['token_str']
+                        score = top_prediction['score'] * 100
+                        st.markdown(f"""
+                        <div class="bar">
+                            <div class="bar-fill" style="width: {score}%;"></div>
+                        </div>
+                        <div class="container">
+                            <div style="align-items: left;">{predicted_word} (line {index})</div>
+                            <div style="align-items: right;">{score:.2f}%</div>
+                        </div>
+                        """, unsafe_allow_html=True)
 if 'result' in locals():
+    if result:
+        line = 0
+        for sentence, predictions in result.items():
+            line += 1
+            predicted_word = predictions[0]['token_str']
+            full_sentence = replace_mask(sentence, predicted_word)
+            st.write(f"**Sentence {line}:** {full_sentence }")
 css = """
 <style>