Tweak the progress bar
Browse files- app.py +48 -46
- eval_utils.py +4 -1
app.py
CHANGED
@@ -97,6 +97,52 @@ with tab1:
|
|
97 |
)
|
98 |
st.write("Note: The metrics are macro-averaged across all dialects.")
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
with tab2:
|
101 |
model_name = st.text_input("Enter a model's name on HF")
|
102 |
inference_functions_names = [
|
@@ -147,8 +193,9 @@ with tab2:
|
|
147 |
)
|
148 |
progress_bar.progress(
|
149 |
min(i / len(sentences), 1),
|
150 |
-
text=
|
151 |
)
|
|
|
152 |
progress_bar.empty()
|
153 |
|
154 |
# Store the predictions in a private dataset
|
@@ -160,48 +207,3 @@ with tab2:
|
|
160 |
)
|
161 |
|
162 |
st.toast(f"Inference completed!")
|
163 |
-
|
164 |
-
with st.expander("Cite this leaderboard!"):
|
165 |
-
st.write(
|
166 |
-
"""
|
167 |
-
Please cite the following paper in which we introduced the NADI 2024 evaluation sets:
|
168 |
-
```
|
169 |
-
@inproceedings{abdul-mageed-etal-2024-nadi,
|
170 |
-
title = "{NADI} 2024: The Fifth Nuanced {A}rabic Dialect Identification Shared Task",
|
171 |
-
author = "Abdul-Mageed, Muhammad and
|
172 |
-
Keleg, Amr and
|
173 |
-
Elmadany, AbdelRahim and
|
174 |
-
Zhang, Chiyu and
|
175 |
-
Hamed, Injy and
|
176 |
-
Magdy, Walid and
|
177 |
-
Bouamor, Houda and
|
178 |
-
Habash, Nizar",
|
179 |
-
editor = "Habash, Nizar and
|
180 |
-
Bouamor, Houda and
|
181 |
-
Eskander, Ramy and
|
182 |
-
Tomeh, Nadi and
|
183 |
-
Abu Farha, Ibrahim and
|
184 |
-
Abdelali, Ahmed and
|
185 |
-
Touileb, Samia and
|
186 |
-
Hamed, Injy and
|
187 |
-
Onaizan, Yaser and
|
188 |
-
Alhafni, Bashar and
|
189 |
-
Antoun, Wissam and
|
190 |
-
Khalifa, Salam and
|
191 |
-
Haddad, Hatem and
|
192 |
-
Zitouni, Imed and
|
193 |
-
AlKhamissi, Badr and
|
194 |
-
Almatham, Rawan and
|
195 |
-
Mrini, Khalil",
|
196 |
-
booktitle = "Proceedings of The Second Arabic Natural Language Processing Conference",
|
197 |
-
month = aug,
|
198 |
-
year = "2024",
|
199 |
-
address = "Bangkok, Thailand",
|
200 |
-
publisher = "Association for Computational Linguistics",
|
201 |
-
url = "https://aclanthology.org/2024.arabicnlp-1.79",
|
202 |
-
doi = "10.18653/v1/2024.arabicnlp-1.79",
|
203 |
-
pages = "709--728",
|
204 |
-
}
|
205 |
-
```
|
206 |
-
"""
|
207 |
-
)
|
|
|
97 |
)
|
98 |
st.write("Note: The metrics are macro-averaged across all dialects.")
|
99 |
|
100 |
+
with st.expander("Cite this leaderboard!"):
|
101 |
+
st.write(
|
102 |
+
"""
|
103 |
+
Please cite the following paper in which we introduced the NADI 2024 evaluation sets:
|
104 |
+
```
|
105 |
+
@inproceedings{abdul-mageed-etal-2024-nadi,
|
106 |
+
title = "{NADI} 2024: The Fifth Nuanced {A}rabic Dialect Identification Shared Task",
|
107 |
+
author = "Abdul-Mageed, Muhammad and
|
108 |
+
Keleg, Amr and
|
109 |
+
Elmadany, AbdelRahim and
|
110 |
+
Zhang, Chiyu and
|
111 |
+
Hamed, Injy and
|
112 |
+
Magdy, Walid and
|
113 |
+
Bouamor, Houda and
|
114 |
+
Habash, Nizar",
|
115 |
+
editor = "Habash, Nizar and
|
116 |
+
Bouamor, Houda and
|
117 |
+
Eskander, Ramy and
|
118 |
+
Tomeh, Nadi and
|
119 |
+
Abu Farha, Ibrahim and
|
120 |
+
Abdelali, Ahmed and
|
121 |
+
Touileb, Samia and
|
122 |
+
Hamed, Injy and
|
123 |
+
Onaizan, Yaser and
|
124 |
+
Alhafni, Bashar and
|
125 |
+
Antoun, Wissam and
|
126 |
+
Khalifa, Salam and
|
127 |
+
Haddad, Hatem and
|
128 |
+
Zitouni, Imed and
|
129 |
+
AlKhamissi, Badr and
|
130 |
+
Almatham, Rawan and
|
131 |
+
Mrini, Khalil",
|
132 |
+
booktitle = "Proceedings of The Second Arabic Natural Language Processing Conference",
|
133 |
+
month = aug,
|
134 |
+
year = "2024",
|
135 |
+
address = "Bangkok, Thailand",
|
136 |
+
publisher = "Association for Computational Linguistics",
|
137 |
+
url = "https://aclanthology.org/2024.arabicnlp-1.79",
|
138 |
+
doi = "10.18653/v1/2024.arabicnlp-1.79",
|
139 |
+
pages = "709--728",
|
140 |
+
}
|
141 |
+
```
|
142 |
+
"""
|
143 |
+
)
|
144 |
+
|
145 |
+
|
146 |
with tab2:
|
147 |
model_name = st.text_input("Enter a model's name on HF")
|
148 |
inference_functions_names = [
|
|
|
193 |
)
|
194 |
progress_bar.progress(
|
195 |
min(i / len(sentences), 1),
|
196 |
+
text=progress_text,
|
197 |
)
|
198 |
+
print(f"{model_name} - Progress: {i}/{len(sentences)}")
|
199 |
progress_bar.empty()
|
200 |
|
201 |
# Store the predictions in a private dataset
|
|
|
207 |
)
|
208 |
|
209 |
st.toast(f"Inference completed!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
eval_utils.py
CHANGED
@@ -3,7 +3,10 @@ from constants import DIALECTS, DIALECTS_WITH_LABELS
|
|
3 |
|
4 |
|
5 |
def predict_top_p(model, tokenizer, text, P=0.9):
|
6 |
-
"""Predict the top dialects with an accumulative confidence of at least P (set by default to 0.9).
|
|
|
|
|
|
|
7 |
assert P <= 1 and P >= 0
|
8 |
|
9 |
logits = model(**tokenizer(text, return_tensors="pt")).logits
|
|
|
3 |
|
4 |
|
5 |
def predict_top_p(model, tokenizer, text, P=0.9):
|
6 |
+
"""Predict the top dialects with an accumulative confidence of at least P (set by default to 0.9).
|
7 |
+
The model is expected to generate logits for each dialect of the following dialects in the same order:
|
8 |
+
Algeria, Bahrain, Egypt, Iraq, Jordan, Kuwait, Lebanon, Libya, Morocco, Oman, Palestine, Qatar, Saudi_Arabia, Sudan, Syria, Tunisia, UAE, Yemen.
|
9 |
+
"""
|
10 |
assert P <= 1 and P >= 0
|
11 |
|
12 |
logits = model(**tokenizer(text, return_tensors="pt")).logits
|