Spaces:
Running
Running
Muennighoff
commited on
Commit
·
ac3fdf5
1
Parent(s):
842d3bc
Rename BTM
Browse files
app.py
CHANGED
@@ -54,7 +54,7 @@ TASK_LIST_CLASSIFICATION_NB = [
|
|
54 |
"NorwegianParliament",
|
55 |
"MassiveIntentClassification (nb)",
|
56 |
"MassiveScenarioClassification (nb)",
|
57 |
-
"ScalaNbClassification
|
58 |
]
|
59 |
|
60 |
TASK_LIST_CLASSIFICATION_SV = [
|
@@ -62,7 +62,6 @@ TASK_LIST_CLASSIFICATION_SV = [
|
|
62 |
"MassiveIntentClassification (sv)",
|
63 |
"MassiveScenarioClassification (sv)",
|
64 |
"NordicLangClassification",
|
65 |
-
"ScalaNbClassification",
|
66 |
"ScalaSvClassification",
|
67 |
"SweRecClassification",
|
68 |
]
|
@@ -587,6 +586,15 @@ def get_dim_seq_size(model):
|
|
587 |
size = round(size["metadata"]["total_size"] / 1e9, 2)
|
588 |
return dim, seq, size
|
589 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
590 |
def add_rank(df):
|
591 |
cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (GB)", "Embedding Dimensions", "Sequence Length"]]
|
592 |
if len(cols_to_rank) == 1:
|
@@ -659,8 +667,6 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
659 |
df = pd.DataFrame(df_list)
|
660 |
# If there are any models that are the same, merge them
|
661 |
# E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
|
662 |
-
# Save to csv
|
663 |
-
df.to_csv("mteb.csv", index=False)
|
664 |
df = df.groupby("Model", as_index=False).first()
|
665 |
# Put 'Model' column first
|
666 |
cols = sorted(list(df.columns))
|
@@ -780,7 +786,7 @@ with block:
|
|
780 |
with gr.TabItem("English-X"):
|
781 |
with gr.Row():
|
782 |
gr.Markdown("""
|
783 |
-
**Bitext Mining Leaderboard
|
784 |
|
785 |
- **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
|
786 |
- **Languages:** 117 (Pairs of: English & other language)
|
@@ -801,13 +807,13 @@ with block:
|
|
801 |
inputs=[task_bitext_mining, lang_bitext_mining_other, datasets_bitext_mining_other],
|
802 |
outputs=data_bitext_mining,
|
803 |
)
|
804 |
-
with gr.TabItem("
|
805 |
with gr.Row():
|
806 |
gr.Markdown("""
|
807 |
-
**Bitext Mining
|
808 |
|
809 |
- **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
|
810 |
-
- **Languages:**
|
811 |
- **Credits:** [Kenneth Enevoldsen](https://github.com/KennethEnevoldsen)
|
812 |
""")
|
813 |
with gr.Row():
|
|
|
54 |
"NorwegianParliament",
|
55 |
"MassiveIntentClassification (nb)",
|
56 |
"MassiveScenarioClassification (nb)",
|
57 |
+
"ScalaNbClassification",
|
58 |
]
|
59 |
|
60 |
TASK_LIST_CLASSIFICATION_SV = [
|
|
|
62 |
"MassiveIntentClassification (sv)",
|
63 |
"MassiveScenarioClassification (sv)",
|
64 |
"NordicLangClassification",
|
|
|
65 |
"ScalaSvClassification",
|
66 |
"SweRecClassification",
|
67 |
]
|
|
|
586 |
size = round(size["metadata"]["total_size"] / 1e9, 2)
|
587 |
return dim, seq, size
|
588 |
|
589 |
+
def make_datasets_clickable(df):
|
590 |
+
"""Does not work"""
|
591 |
+
if "BornholmBitextMining" in df.columns:
|
592 |
+
link = "https://huggingface.co/datasets/strombergnlp/bornholmsk_parallel"
|
593 |
+
df = df.rename(
|
594 |
+
columns={f'BornholmBitextMining': '<a target="_blank" style="text-decoration: underline" href="{link}">BornholmBitextMining</a>',})
|
595 |
+
return df
|
596 |
+
|
597 |
+
|
598 |
def add_rank(df):
|
599 |
cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (GB)", "Embedding Dimensions", "Sequence Length"]]
|
600 |
if len(cols_to_rank) == 1:
|
|
|
667 |
df = pd.DataFrame(df_list)
|
668 |
# If there are any models that are the same, merge them
|
669 |
# E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
|
|
|
|
|
670 |
df = df.groupby("Model", as_index=False).first()
|
671 |
# Put 'Model' column first
|
672 |
cols = sorted(list(df.columns))
|
|
|
786 |
with gr.TabItem("English-X"):
|
787 |
with gr.Row():
|
788 |
gr.Markdown("""
|
789 |
+
**Bitext Mining Leaderboard 🎌**
|
790 |
|
791 |
- **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
|
792 |
- **Languages:** 117 (Pairs of: English & other language)
|
|
|
807 |
inputs=[task_bitext_mining, lang_bitext_mining_other, datasets_bitext_mining_other],
|
808 |
outputs=data_bitext_mining,
|
809 |
)
|
810 |
+
with gr.TabItem("Danish"):
|
811 |
with gr.Row():
|
812 |
gr.Markdown("""
|
813 |
+
**Bitext Mining Danish Leaderboard 🇩🇰🎌**
|
814 |
|
815 |
- **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
|
816 |
+
- **Languages:** Danish & Bornholmsk (Danish Dialect)
|
817 |
- **Credits:** [Kenneth Enevoldsen](https://github.com/KennethEnevoldsen)
|
818 |
""")
|
819 |
with gr.Row():
|