Spaces:

AMR-KELEG
/

MLADI

Running

App Files Files Community

AMR-KELEG commited on Nov 2, 2024

Commit

1a452c0

1 Parent(s): a0eb3d1

Tweak the progress bar

Browse files

Files changed (2) hide show

app.py +48 -46
eval_utils.py +4 -1

app.py CHANGED Viewed

@@ -97,6 +97,52 @@ with tab1:
         )
         st.write("Note: The metrics are macro-averaged across all dialects.")
 with tab2:
     model_name = st.text_input("Enter a model's name on HF")
     inference_functions_names = [
@@ -147,8 +193,9 @@ with tab2:
             )
             progress_bar.progress(
                 min(i / len(sentences), 1),
-                text=f"Step {i}/{len(sentences)} - " + progress_text,
             )
         progress_bar.empty()
         # Store the predictions in a private dataset
@@ -160,48 +207,3 @@ with tab2:
         )
         st.toast(f"Inference completed!")
-with st.expander("Cite this leaderboard!"):
-    st.write(
-        """
-        Please cite the following paper in which we introduced the NADI 2024 evaluation sets:
-        ```
-        @inproceedings{abdul-mageed-etal-2024-nadi,
-            title = "{NADI} 2024: The Fifth Nuanced {A}rabic Dialect Identification Shared Task",
-            author = "Abdul-Mageed, Muhammad  and
-            Keleg, Amr  and
-            Elmadany, AbdelRahim  and
-            Zhang, Chiyu  and
-            Hamed, Injy  and
-            Magdy, Walid  and
-            Bouamor, Houda  and
-            Habash, Nizar",
-            editor = "Habash, Nizar  and
-            Bouamor, Houda  and
-            Eskander, Ramy  and
-            Tomeh, Nadi  and
-            Abu Farha, Ibrahim  and
-            Abdelali, Ahmed  and
-            Touileb, Samia  and
-            Hamed, Injy  and
-            Onaizan, Yaser  and
-            Alhafni, Bashar  and
-            Antoun, Wissam  and
-            Khalifa, Salam  and
-            Haddad, Hatem  and
-            Zitouni, Imed  and
-            AlKhamissi, Badr  and
-            Almatham, Rawan  and
-            Mrini, Khalil",
-            booktitle = "Proceedings of The Second Arabic Natural Language Processing Conference",
-            month = aug,
-            year = "2024",
-            address = "Bangkok, Thailand",
-            publisher = "Association for Computational Linguistics",
-            url = "https://aclanthology.org/2024.arabicnlp-1.79",
-            doi = "10.18653/v1/2024.arabicnlp-1.79",
-            pages = "709--728",
-        }
-        ```
-        """
-    )

         )
         st.write("Note: The metrics are macro-averaged across all dialects.")
+        with st.expander("Cite this leaderboard!"):
+            st.write(
+                """
+                Please cite the following paper in which we introduced the NADI 2024 evaluation sets:
+                ```
+                @inproceedings{abdul-mageed-etal-2024-nadi,
+                    title = "{NADI} 2024: The Fifth Nuanced {A}rabic Dialect Identification Shared Task",
+                    author = "Abdul-Mageed, Muhammad  and
+                    Keleg, Amr  and
+                    Elmadany, AbdelRahim  and
+                    Zhang, Chiyu  and
+                    Hamed, Injy  and
+                    Magdy, Walid  and
+                    Bouamor, Houda  and
+                    Habash, Nizar",
+                    editor = "Habash, Nizar  and
+                    Bouamor, Houda  and
+                    Eskander, Ramy  and
+                    Tomeh, Nadi  and
+                    Abu Farha, Ibrahim  and
+                    Abdelali, Ahmed  and
+                    Touileb, Samia  and
+                    Hamed, Injy  and
+                    Onaizan, Yaser  and
+                    Alhafni, Bashar  and
+                    Antoun, Wissam  and
+                    Khalifa, Salam  and
+                    Haddad, Hatem  and
+                    Zitouni, Imed  and
+                    AlKhamissi, Badr  and
+                    Almatham, Rawan  and
+                    Mrini, Khalil",
+                    booktitle = "Proceedings of The Second Arabic Natural Language Processing Conference",
+                    month = aug,
+                    year = "2024",
+                    address = "Bangkok, Thailand",
+                    publisher = "Association for Computational Linguistics",
+                    url = "https://aclanthology.org/2024.arabicnlp-1.79",
+                    doi = "10.18653/v1/2024.arabicnlp-1.79",
+                    pages = "709--728",
+                }
+                ```
+                """
+            )
 with tab2:
     model_name = st.text_input("Enter a model's name on HF")
     inference_functions_names = [
             )
             progress_bar.progress(
                 min(i / len(sentences), 1),
+                text=progress_text,
             )
+            print(f"{model_name} - Progress: {i}/{len(sentences)}")
         progress_bar.empty()
         # Store the predictions in a private dataset
         )
         st.toast(f"Inference completed!")

eval_utils.py CHANGED Viewed

@@ -3,7 +3,10 @@ from constants import DIALECTS, DIALECTS_WITH_LABELS
 def predict_top_p(model, tokenizer, text, P=0.9):
-    """Predict the top dialects with an accumulative confidence of at least P (set by default to 0.9)."""
     assert P <= 1 and P >= 0
     logits = model(**tokenizer(text, return_tensors="pt")).logits

 def predict_top_p(model, tokenizer, text, P=0.9):
+    """Predict the top dialects with an accumulative confidence of at least P (set by default to 0.9).
+    The model is expected to generate logits for each dialect of the following dialects in the same order:
+    Algeria, Bahrain, Egypt, Iraq, Jordan, Kuwait, Lebanon, Libya, Morocco, Oman, Palestine, Qatar, Saudi_Arabia, Sudan, Syria, Tunisia, UAE, Yemen.
+    """
     assert P <= 1 and P >= 0
     logits = model(**tokenizer(text, return_tensors="pt")).logits