Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Added SNLI score
Browse files- src/about.py +44 -4
src/about.py
CHANGED
@@ -12,10 +12,11 @@ class Task:
|
|
12 |
# ---------------------------------------------------
|
13 |
class Tasks(Enum):
|
14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
15 |
-
task0 = Task("custom|
|
16 |
-
task1 = Task("custom|
|
17 |
-
task2 = Task("custom|
|
18 |
-
task3 = Task("custom|
|
|
|
19 |
|
20 |
NUM_FEWSHOT = 0 # Change with your few shot
|
21 |
# ---------------------------------------------------
|
@@ -170,6 +171,45 @@ English: Some sentence to translate to Hebrew <br/>
|
|
170 |
Hebrew:
|
171 |
</blockquote>
|
172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
"""
|
174 |
|
175 |
EVALUATION_QUEUE_TEXT = """
|
|
|
12 |
# ---------------------------------------------------
|
13 |
class Tasks(Enum):
|
14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
15 |
+
task0 = Task("custom|snli-acc|0", "snli_acc", "SNLI Accuracy")
|
16 |
+
task1 = Task("custom|heq-qa-tlnls|0", "heq_tlnls", "QA TLNLS (HeQ)")
|
17 |
+
task2 = Task("custom|sentiment-acc|0", "sentiment_acc", "Sentiment Acc (Mafat)")
|
18 |
+
task3 = Task("custom|winograd-acc|0", "winograd_acc", "Winograd (Binary) Acc (V. Schwartz)")
|
19 |
+
task4 = Task("custom|he-en-trans-bleu|0", "sentence_bleu", "Translation BLEU")
|
20 |
|
21 |
NUM_FEWSHOT = 0 # Change with your few shot
|
22 |
# ---------------------------------------------------
|
|
|
171 |
Hebrew:
|
172 |
</blockquote>
|
173 |
|
174 |
+
5. SNLI Accuracy
|
175 |
+
|
176 |
+
- **Source**: We took a sample of documents from the test-subset of the official SNLI corpus.
|
177 |
+
|
178 |
+
- **Scoring**: We compute the accuracy score on the predictions, expecting either "ืกืชืืจื", "ืืชืืื", or "ืืืื".
|
179 |
+
|
180 |
+
- **Number of examples**: There are a total of 210 examples - 70 from each class - where each example was translated using [Dicta's translation engine](https://translate.dicta.org.il), and then manually reviewed and corrected as needed.
|
181 |
+
|
182 |
+
- **Few-Shot Format**: For every prompt, we provide 12 few-shot examples, 4 from each category.
|
183 |
+
|
184 |
+
For example:
|
185 |
+
|
186 |
+
|
187 |
+
<blockquote dir="rtl" style='text-align: right; background-color: #f0f0f0'>
|
188 |
+
<p>
|
189 |
+
ืื ืืช ืืกืื: ื ืขืจ ืื ืื ืืืฆืืฆืจืชื ืืืืื ืืืคืขื ืขื ืืืงืชื.<br/>
|
190 |
+
ืืฉืขืจื: ืืืฃ ืืื ืืื ืืฆืืฆืจื.<br/>
|
191 |
+
ืชืฉืืื: ืกืชืืจื<br/>
|
192 |
+
|
193 |
+
...
|
194 |
+
|
195 |
+
ืื ืืช ืืกืื: ืื ืขืจื ืืืืฉื ืืืขืื ืืื, ืืขืืื ืคืืกืขืช ืืฉืื.<br/>
|
196 |
+
ืืฉืขืจื: ืืืืจืช ืืืืืฉืช ืืขืื ืืืคืฉืช ืืช ืืืื ืืืืื.<br/>
|
197 |
+
ืชืฉืืื: ืืืื<br/>
|
198 |
+
|
199 |
+
...
|
200 |
+
|
201 |
+
ืื ืืช ืืกืื: ืกืคืื ืชึพืคืืจ ืื ืื ืฉืื ืขืืืื ืืืืจืืื.<br/>
|
202 |
+
ืืฉืขืจื: ืื ืฉืื ืขืืืื ืืืืจืืื ืืกืคืื ืืช.<br/>
|
203 |
+
ืชืฉืืื: ืืชืืื<br/>
|
204 |
+
|
205 |
+
...
|
206 |
+
|
207 |
+
ืื ืืช ืืกืื: ืื ืื ืืืฉื<br/>
|
208 |
+
ืืฉืขืจื: ืืฉืขืจื ืืืฉื<br/>
|
209 |
+
ืชืฉืืื:
|
210 |
+
</p>
|
211 |
+
</blockquote>
|
212 |
+
|
213 |
"""
|
214 |
|
215 |
EVALUATION_QUEUE_TEXT = """
|