AMR-KELEG commited on
Commit
1a452c0
·
1 Parent(s): a0eb3d1

Tweak the progress bar

Browse files
Files changed (2) hide show
  1. app.py +48 -46
  2. eval_utils.py +4 -1
app.py CHANGED
@@ -97,6 +97,52 @@ with tab1:
97
  )
98
  st.write("Note: The metrics are macro-averaged across all dialects.")
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  with tab2:
101
  model_name = st.text_input("Enter a model's name on HF")
102
  inference_functions_names = [
@@ -147,8 +193,9 @@ with tab2:
147
  )
148
  progress_bar.progress(
149
  min(i / len(sentences), 1),
150
- text=f"Step {i}/{len(sentences)} - " + progress_text,
151
  )
 
152
  progress_bar.empty()
153
 
154
  # Store the predictions in a private dataset
@@ -160,48 +207,3 @@ with tab2:
160
  )
161
 
162
  st.toast(f"Inference completed!")
163
-
164
- with st.expander("Cite this leaderboard!"):
165
- st.write(
166
- """
167
- Please cite the following paper in which we introduced the NADI 2024 evaluation sets:
168
- ```
169
- @inproceedings{abdul-mageed-etal-2024-nadi,
170
- title = "{NADI} 2024: The Fifth Nuanced {A}rabic Dialect Identification Shared Task",
171
- author = "Abdul-Mageed, Muhammad and
172
- Keleg, Amr and
173
- Elmadany, AbdelRahim and
174
- Zhang, Chiyu and
175
- Hamed, Injy and
176
- Magdy, Walid and
177
- Bouamor, Houda and
178
- Habash, Nizar",
179
- editor = "Habash, Nizar and
180
- Bouamor, Houda and
181
- Eskander, Ramy and
182
- Tomeh, Nadi and
183
- Abu Farha, Ibrahim and
184
- Abdelali, Ahmed and
185
- Touileb, Samia and
186
- Hamed, Injy and
187
- Onaizan, Yaser and
188
- Alhafni, Bashar and
189
- Antoun, Wissam and
190
- Khalifa, Salam and
191
- Haddad, Hatem and
192
- Zitouni, Imed and
193
- AlKhamissi, Badr and
194
- Almatham, Rawan and
195
- Mrini, Khalil",
196
- booktitle = "Proceedings of The Second Arabic Natural Language Processing Conference",
197
- month = aug,
198
- year = "2024",
199
- address = "Bangkok, Thailand",
200
- publisher = "Association for Computational Linguistics",
201
- url = "https://aclanthology.org/2024.arabicnlp-1.79",
202
- doi = "10.18653/v1/2024.arabicnlp-1.79",
203
- pages = "709--728",
204
- }
205
- ```
206
- """
207
- )
 
97
  )
98
  st.write("Note: The metrics are macro-averaged across all dialects.")
99
 
100
+ with st.expander("Cite this leaderboard!"):
101
+ st.write(
102
+ """
103
+ Please cite the following paper in which we introduced the NADI 2024 evaluation sets:
104
+ ```
105
+ @inproceedings{abdul-mageed-etal-2024-nadi,
106
+ title = "{NADI} 2024: The Fifth Nuanced {A}rabic Dialect Identification Shared Task",
107
+ author = "Abdul-Mageed, Muhammad and
108
+ Keleg, Amr and
109
+ Elmadany, AbdelRahim and
110
+ Zhang, Chiyu and
111
+ Hamed, Injy and
112
+ Magdy, Walid and
113
+ Bouamor, Houda and
114
+ Habash, Nizar",
115
+ editor = "Habash, Nizar and
116
+ Bouamor, Houda and
117
+ Eskander, Ramy and
118
+ Tomeh, Nadi and
119
+ Abu Farha, Ibrahim and
120
+ Abdelali, Ahmed and
121
+ Touileb, Samia and
122
+ Hamed, Injy and
123
+ Onaizan, Yaser and
124
+ Alhafni, Bashar and
125
+ Antoun, Wissam and
126
+ Khalifa, Salam and
127
+ Haddad, Hatem and
128
+ Zitouni, Imed and
129
+ AlKhamissi, Badr and
130
+ Almatham, Rawan and
131
+ Mrini, Khalil",
132
+ booktitle = "Proceedings of The Second Arabic Natural Language Processing Conference",
133
+ month = aug,
134
+ year = "2024",
135
+ address = "Bangkok, Thailand",
136
+ publisher = "Association for Computational Linguistics",
137
+ url = "https://aclanthology.org/2024.arabicnlp-1.79",
138
+ doi = "10.18653/v1/2024.arabicnlp-1.79",
139
+ pages = "709--728",
140
+ }
141
+ ```
142
+ """
143
+ )
144
+
145
+
146
  with tab2:
147
  model_name = st.text_input("Enter a model's name on HF")
148
  inference_functions_names = [
 
193
  )
194
  progress_bar.progress(
195
  min(i / len(sentences), 1),
196
+ text=progress_text,
197
  )
198
+ print(f"{model_name} - Progress: {i}/{len(sentences)}")
199
  progress_bar.empty()
200
 
201
  # Store the predictions in a private dataset
 
207
  )
208
 
209
  st.toast(f"Inference completed!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eval_utils.py CHANGED
@@ -3,7 +3,10 @@ from constants import DIALECTS, DIALECTS_WITH_LABELS
3
 
4
 
5
  def predict_top_p(model, tokenizer, text, P=0.9):
6
- """Predict the top dialects with an accumulative confidence of at least P (set by default to 0.9)."""
 
 
 
7
  assert P <= 1 and P >= 0
8
 
9
  logits = model(**tokenizer(text, return_tensors="pt")).logits
 
3
 
4
 
5
  def predict_top_p(model, tokenizer, text, P=0.9):
6
+ """Predict the top dialects with an accumulative confidence of at least P (set by default to 0.9).
7
+ The model is expected to generate logits for each dialect of the following dialects in the same order:
8
+ Algeria, Bahrain, Egypt, Iraq, Jordan, Kuwait, Lebanon, Libya, Morocco, Oman, Palestine, Qatar, Saudi_Arabia, Sudan, Syria, Tunisia, UAE, Yemen.
9
+ """
10
  assert P <= 1 and P >= 0
11
 
12
  logits = model(**tokenizer(text, return_tensors="pt")).logits