File size: 6,547 Bytes
d392fbe
eb50697
3a93505
d392fbe
 
 
 
 
 
 
 
 
 
eb50697
d392fbe
eb50697
d392fbe
eb50697
d392fbe
 
 
eb50697
d392fbe
 
 
eb50697
d392fbe
 
 
 
 
 
 
 
eb50697
 
 
 
 
d392fbe
 
 
85ca42c
d392fbe
3a93505
85ca42c
3a93505
 
 
 
 
85ca42c
3a93505
 
 
85ca42c
3a93505
 
85ca42c
3a93505
 
 
 
 
 
 
d392fbe
85ca42c
 
 
 
 
 
d392fbe
1aa355c
d392fbe
 
 
 
 
de145b2
d392fbe
 
 
85ca42c
 
 
 
 
 
 
d392fbe
 
eb50697
85ca42c
eabeecb
d392fbe
 
 
3a93505
85ca42c
3a93505
85ca42c
 
 
 
 
 
3a93505
 
d392fbe
3a93505
d392fbe
 
 
 
3a93505
d392fbe
 
 
 
 
3a93505
293e470
3a93505
 
eb50697
3a93505
 
 
 
 
 
eb50697
3a93505
 
 
 
 
d392fbe
 
 
eb50697
d392fbe
 
 
 
 
de145b2
 
d392fbe
de145b2
d392fbe
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import gradio as gr
from utils import submit_gradio_module, load_retrieval_results
from fuzzywuzzy import fuzz

HEADER = """<div style="text-align: center; margin-bottom: 20px;">
    <h1>The Arabic RAG Leaderboard</h1>
    <p style="font-size: 14px; color: #888;">The only leaderboard you will require for your RAG needs πŸ†</p>
</div>
"""

ABOUT_SECTION = """
## About

The Arabic RAG Leaderboard is designed to evaluate and compare the performance of Retrieval-Augmented Generation (RAG) models on a set of retrieval and generative tasks. By leveraging a comprehensive evaluation framework, the leaderboard provides a detailed assessment of a model's ability to retrieve relevant information and generate accurate, coherent, and contextually appropriate responses.

### Why Focus on RAG Models?

The Arabic RAG Leaderboard is specifically designed to assess **RAG models**, which combine retrieval mechanisms with generative capabilities to enhance the quality and relevance of generated content. These models are particularly useful in scenarios where access to up-to-date and contextually relevant information is crucial. While foundational models can be evaluated, the primary focus is on RAG models that excel in both retrieval and generation tasks.

### How to Submit Your Model?

Navigate to the submission section below to submit your RAG model from the HuggingFace Hub for evaluation. Ensure that your model is public and the submitted metadata (precision, revision, #params) is accurate.

### Contact

For any inquiries or assistance, feel free to reach out through the community tab at [Navid-AI Community](https://huggingface.co/spaces/Navid-AI/The-Arabic-Rag-Leaderboard/discussions) or via [email](mailto:[email protected]).
"""

CITATION_BUTTON_LABEL = """
Copy the following snippet to cite these results
"""

CITATION_BUTTON_TEXT = """
@misc{AraGen,
  author = {Mohaned A. Rashad, Hamza Shahid},
  title = {The Arabic RAG Leaderboard},
  year = {2025},
  publisher = {Navid-AI},
  howpublished = "url{https://huggingface.co/spaces/Navid-AI/The-Arabic-Rag-Leaderboard}"
}
"""

df = None

def search_leaderboard(model_name):
    if len(model_name.strip()) == 0:
        return df
    
    threshold = 95  # You can adjust this value to make the search more or less strict

    def calculate_similarity(row):
        similarity = fuzz.partial_ratio(model_name.lower(), row["Model"].lower())
        return similarity if similarity >= threshold else 0

    # Add a new column for similarity scores
    df["similarity"] = df.apply(calculate_similarity, axis=1)

    # Filter and sort the dataframe
    filtered_df = df[df["similarity"] > 0].sort_values('similarity', ascending=False)

    # Remove the similarity column before returning
    filtered_df = filtered_df.drop('similarity', axis=1)

    return filtered_df


def main():
    global df
    df = load_retrieval_results()
    df[["Model"]] = df[["Model"]].applymap(lambda x: f'<a href="https://huggingface.co/{x}" target="_blank">{x}</a>')
    df.drop(columns=["Revision", "Precision", "Task"], inplace=True)

    columns_to_show = ["Model", "Model Size", "Embedding Dimension", "Max Tokens", "Num Likes", "Web Search Dataset (Overall Score)"]
    with gr.Blocks() as demo:
        gr.HTML(HEADER)
        
        with gr.Tabs():
            with gr.Tab("Retrieval"):
                with gr.Tabs():
                    with gr.Tab("Leaderboard"):
                        with gr.Row():
                            search_box_retrieval = gr.Textbox(
                                placeholder="Search for models...", 
                                label="Search", 
                                scale=5
                            )
                            columns_to_show_input = gr.CheckboxGroup(
                                label="Columns to Show",
                                choices=df.columns.tolist(),
                                value=columns_to_show,
                                scale=4
                            )

                        retrieval_leaderboard = gr.Dataframe(
                            value=df[columns_to_show],
                            datatype="markdown",
                            interactive=False
                        )
                        
                        # Submit the search box and the leaderboard
                        search_box_retrieval.input(
                            search_leaderboard,
                            inputs=[search_box_retrieval],
                            outputs=retrieval_leaderboard
                        )
                        columns_to_show_input.select(
                            lambda columns: gr.update(value=df[[col for col in df.columns if col in columns]]),
                            inputs=columns_to_show_input,
                            outputs=retrieval_leaderboard
                        )
                        
                    submit_gradio_module("Retriever")

            with gr.Tab("Reranking"):
                with gr.Tabs():
                    with gr.Tab("Leaderboard"):
                        search_box_reranker = gr.Textbox(
                                placeholder="Search for models...", 
                                label="Search", 
                                interactive=True
                            )
                        
                        reranker_leaderboard = gr.Dataframe(
                            df[columns_to_show],
                            interactive=False,
                            )

                        search_box_reranker.submit(
                            search_leaderboard,
                            inputs=search_box_reranker,
                            outputs=reranker_leaderboard
                        )
                    submit_gradio_module("Reranker")
                    
            # with gr.Tab("LLM Context Answering"):
            #     with gr.Tabs():
            #         with gr.Tab("Leaderboard"):
            #             pass
            #         submit_gradio_module("LLM")

            with gr.Row():
                with gr.Accordion("πŸ“™ Citation", open=False):
                    gr.Textbox(
                        value=CITATION_BUTTON_TEXT,
                        label=CITATION_BUTTON_LABEL,
                        lines=20,
                        elem_id="citation-button",
                        show_copy_button=True,
                    )

    demo.launch()

if __name__ == "__main__":
    main()