Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 6,561 Bytes
d392fbe eb50697 3a93505 d392fbe eb50697 d392fbe eb50697 d392fbe eb50697 d392fbe eb50697 d392fbe eb50697 d392fbe eb50697 d392fbe 85ca42c d392fbe 3a93505 85ca42c 3a93505 85ca42c 3a93505 85ca42c 3a93505 85ca42c 3a93505 d392fbe 85ca42c cb1120c d392fbe 1aa355c d392fbe de145b2 d392fbe 85ca42c d392fbe eb50697 85ca42c eabeecb d392fbe 3a93505 85ca42c 3a93505 85ca42c 3a93505 d392fbe 3a93505 d392fbe 3a93505 d392fbe 3a93505 293e470 3a93505 eb50697 3a93505 eb50697 3a93505 d392fbe eb50697 d392fbe de145b2 d392fbe de145b2 d392fbe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import gradio as gr
from utils import submit_gradio_module, load_retrieval_results
from fuzzywuzzy import fuzz
HEADER = """<div style="text-align: center; margin-bottom: 20px;">
<h1>The Arabic RAG Leaderboard</h1>
<p style="font-size: 14px; color: #888;">The only leaderboard you will require for your RAG needs π</p>
</div>
"""
ABOUT_SECTION = """
## About
The Arabic RAG Leaderboard is designed to evaluate and compare the performance of Retrieval-Augmented Generation (RAG) models on a set of retrieval and generative tasks. By leveraging a comprehensive evaluation framework, the leaderboard provides a detailed assessment of a model's ability to retrieve relevant information and generate accurate, coherent, and contextually appropriate responses.
### Why Focus on RAG Models?
The Arabic RAG Leaderboard is specifically designed to assess **RAG models**, which combine retrieval mechanisms with generative capabilities to enhance the quality and relevance of generated content. These models are particularly useful in scenarios where access to up-to-date and contextually relevant information is crucial. While foundational models can be evaluated, the primary focus is on RAG models that excel in both retrieval and generation tasks.
### How to Submit Your Model?
Navigate to the submission section below to submit your RAG model from the HuggingFace Hub for evaluation. Ensure that your model is public and the submitted metadata (precision, revision, #params) is accurate.
### Contact
For any inquiries or assistance, feel free to reach out through the community tab at [Navid-AI Community](https://huggingface.co/spaces/Navid-AI/The-Arabic-Rag-Leaderboard/discussions) or via [email](mailto:[email protected]).
"""
CITATION_BUTTON_LABEL = """
Copy the following snippet to cite these results
"""
CITATION_BUTTON_TEXT = """
@misc{AraGen,
author = {Mohaned A. Rashad, Hamza Shahid},
title = {The Arabic RAG Leaderboard},
year = {2025},
publisher = {Navid-AI},
howpublished = "url{https://huggingface.co/spaces/Navid-AI/The-Arabic-Rag-Leaderboard}"
}
"""
df = None
def search_leaderboard(model_name):
if len(model_name.strip()) == 0:
return df
threshold = 95 # You can adjust this value to make the search more or less strict
def calculate_similarity(row):
similarity = fuzz.partial_ratio(model_name.lower(), row["Model"].lower())
return similarity if similarity >= threshold else 0
# Add a new column for similarity scores
df["similarity"] = df.apply(calculate_similarity, axis=1)
# Filter and sort the dataframe
filtered_df = df[df["similarity"] > 0].sort_values('similarity', ascending=False)
# Remove the similarity column before returning
filtered_df = filtered_df.drop('similarity', axis=1)
return filtered_df
def main():
global df
df = load_retrieval_results()
df[["Model"]] = df[["Model"]].applymap(lambda x: f'<a href="https://huggingface.co/{x}" target="_blank">{x}</a>')
df.drop(columns=["Revision", "Precision", "Task"], inplace=True)
columns_to_show = ["Model", "Model Size (in Millions)", "Embedding Dimension", "Max Tokens", "Num Likes", "Web Search Dataset (Overall Score)"]
with gr.Blocks() as demo:
gr.HTML(HEADER)
with gr.Tabs():
with gr.Tab("Retrieval"):
with gr.Tabs():
with gr.Tab("Leaderboard"):
with gr.Row():
search_box_retrieval = gr.Textbox(
placeholder="Search for models...",
label="Search",
scale=5
)
columns_to_show_input = gr.CheckboxGroup(
label="Columns to Show",
choices=df.columns.tolist(),
value=columns_to_show,
scale=4
)
retrieval_leaderboard = gr.Dataframe(
value=df[columns_to_show],
datatype="markdown",
interactive=False
)
# Submit the search box and the leaderboard
search_box_retrieval.input(
search_leaderboard,
inputs=[search_box_retrieval],
outputs=retrieval_leaderboard
)
columns_to_show_input.select(
lambda columns: gr.update(value=df[[col for col in df.columns if col in columns]]),
inputs=columns_to_show_input,
outputs=retrieval_leaderboard
)
submit_gradio_module("Retriever")
with gr.Tab("Reranking"):
with gr.Tabs():
with gr.Tab("Leaderboard"):
search_box_reranker = gr.Textbox(
placeholder="Search for models...",
label="Search",
interactive=True
)
reranker_leaderboard = gr.Dataframe(
df[columns_to_show],
interactive=False,
)
search_box_reranker.submit(
search_leaderboard,
inputs=search_box_reranker,
outputs=reranker_leaderboard
)
submit_gradio_module("Reranker")
# with gr.Tab("LLM Context Answering"):
# with gr.Tabs():
# with gr.Tab("Leaderboard"):
# pass
# submit_gradio_module("LLM")
with gr.Row():
with gr.Accordion("π Citation", open=False):
gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=20,
elem_id="citation-button",
show_copy_button=True,
)
demo.launch()
if __name__ == "__main__":
main()
|