Spaces:

Navid-AI
/

The-Arabic-Rag-Leaderboard

Running on CPU Upgrade

App Files Files Community

The-Arabic-Rag-Leaderboard / app.py

MohamedRashad

Add reranking functionality and update data loading methods for improved leaderboard display

7b90636 19 days ago

raw

history blame

8.41 kB

	import gradio as gr
	from utils import submit_gradio_module, load_retrieval_results, load_reranking_results
	from fuzzywuzzy import fuzz

	HEADER = """<div style="text-align: center; margin-bottom: 20px;">
	<h1>The Arabic RAG Leaderboard</h1>
	<p style="font-size: 14px; color: #888;">The only leaderboard you will require for your RAG needs 🏆</p>
	</div>
	"""

	ABOUT_SECTION = """
	## About

	The Arabic RAG Leaderboard is designed to evaluate and compare the performance of Retrieval-Augmented Generation (RAG) models on a set of retrieval and generative tasks. By leveraging a comprehensive evaluation framework, the leaderboard provides a detailed assessment of a model's ability to retrieve relevant information and generate accurate, coherent, and contextually appropriate responses.

	### Why Focus on RAG Models?

	The Arabic RAG Leaderboard is specifically designed to assess RAG models, which combine retrieval mechanisms with generative capabilities to enhance the quality and relevance of generated content. These models are particularly useful in scenarios where access to up-to-date and contextually relevant information is crucial. While foundational models can be evaluated, the primary focus is on RAG models that excel in both retrieval and generation tasks.

	### How to Submit Your Model?

	Navigate to the submission section below to submit your RAG model from the HuggingFace Hub for evaluation. Ensure that your model is public and the submitted metadata (precision, revision, #params) is accurate.

	### Contact

	For any inquiries or assistance, feel free to reach out through the community tab at [Navid-AI Community](https://huggingface.co/spaces/Navid-AI/The-Arabic-Rag-Leaderboard/discussions) or via [email](mailto:[email protected]).
	"""

	CITATION_BUTTON_LABEL = """
	Copy the following snippet to cite these results
	"""

	CITATION_BUTTON_TEXT = """
	@misc{AraGen,
	author = {Mohaned A. Rashad, Hamza Shahid},
	title = {The Arabic RAG Leaderboard},
	year = {2025},
	publisher = {Navid-AI},
	howpublished = "url{https://huggingface.co/spaces/Navid-AI/The-Arabic-Rag-Leaderboard}"
	}
	"""

	retrieval_df = None
	reranking_df = None

	def search_leaderboard(df, model_name, columns_to_show, threshold=95):
	if len(model_name.strip()) == 0:
	return df.loc[:, columns_to_show]

	def calculate_similarity(row):
	similarity = fuzz.partial_ratio(model_name.lower(), row["Model"].lower())
	return similarity if similarity >= threshold else 0

	filtered_df = df.copy()
	filtered_df["similarity"] = filtered_df.apply(calculate_similarity, axis=1)
	filtered_df = filtered_df[filtered_df["similarity"] > 0].sort_values('similarity', ascending=False)
	filtered_df = filtered_df.drop('similarity', axis=1)
	filtered_df = filtered_df.loc[:, columns_to_show]

	return filtered_df

	def retrieval_search_leaderboard(model_name, columns_to_show):
	return search_leaderboard(retrieval_df, model_name, columns_to_show)

	def reranking_search_leaderboard(model_name, columns_to_show):
	return search_leaderboard(reranking_df, model_name, columns_to_show)


	def main():
	global retrieval_df, reranking_df

	# Prepare retrieval dataframe
	retrieval_df = load_retrieval_results(prepare_for_display=True)
	retrieval_columns_to_show = ["Model", "Web Search Dataset (Overall Score)", "Model Size (in Millions)", "Embedding Dimension", "Max Tokens", "Num Likes"]

	# Prepare reranking dataframe
	reranking_df = load_reranking_results(prepare_for_display=True)
	reranking_columns_to_show = reranking_df.columns.tolist()

	with gr.Blocks() as demo:
	gr.HTML(HEADER)

	with gr.Tabs():
	with gr.Tab("🕵️‍♂️ Retrieval"):
	with gr.Tabs():
	with gr.Tab("👑 Leaderboard"):
	with gr.Row():
	search_box_retrieval = gr.Textbox(
	placeholder="Search for models...",
	label="Search",
	scale=5
	)
	retrieval_columns_to_show_input = gr.CheckboxGroup(
	label="Columns to Show",
	choices=retrieval_df.columns.tolist(),
	value=retrieval_columns_to_show,
	scale=4
	)

	retrieval_leaderboard = gr.Dataframe(
	value=retrieval_df[retrieval_columns_to_show],
	datatype="markdown",
	wrap=True,
	show_fullscreen_button=True,
	interactive=False
	)

	# Submit the search box and the leaderboard
	search_box_retrieval.input(
	retrieval_search_leaderboard,
	inputs=[search_box_retrieval, retrieval_columns_to_show_input],
	outputs=retrieval_leaderboard
	)
	retrieval_columns_to_show_input.select(
	lambda columns: retrieval_df.loc[:, columns],
	inputs=retrieval_columns_to_show_input,
	outputs=retrieval_leaderboard
	)

	with gr.Tab("🏵️ Submit Retriever"):
	submit_gradio_module("Retriever")

	with gr.Tab("ℹ️ About"):
	gr.Markdown(ABOUT_SECTION)

	with gr.Tab("📊 Reranking"):
	with gr.Tabs():
	with gr.Tab("👑 Leaderboard"):
	with gr.Row():
	search_box_reranker = gr.Textbox(
	placeholder="Search for models...",
	label="Search",
	scale=5
	)
	reranking_columns_to_show_input = gr.CheckboxGroup(
	label="Columns to Show",
	choices=reranking_df.columns.tolist(),
	value=reranking_columns_to_show,
	scale=4
	)

	reranker_leaderboard = gr.Dataframe(
	value=reranking_df[reranking_columns_to_show],
	datatype="markdown",
	wrap=True,
	show_fullscreen_button=True,
	interactive=False
	)

	# Submit the search box and the leaderboard
	search_box_reranker.input(
	reranking_search_leaderboard,
	inputs=[search_box_reranker, reranking_columns_to_show_input],
	outputs=reranker_leaderboard
	)
	reranking_columns_to_show_input.select(
	lambda columns: reranking_df.loc[:, columns],
	inputs=reranking_columns_to_show_input,
	outputs=reranker_leaderboard
	)

	with gr.Tab("🏵️ Submit Reranker"):
	submit_gradio_module("Reranker")

	with gr.Tab("ℹ️ About"):
	gr.Markdown(ABOUT_SECTION)

	# with gr.Tab("🧠 LLM Context Answering"):
	# with gr.Tabs():
	# with gr.Tab("Leaderboard"):
	# pass
	# submit_gradio_module("LLM")

	with gr.Row():
	with gr.Accordion("📙 Citation", open=False):
	gr.Textbox(
	value=CITATION_BUTTON_TEXT,
	label=CITATION_BUTTON_LABEL,
	lines=20,
	elem_id="citation-button",
	show_copy_button=True,
	)

	demo.launch()

	if __name__ == "__main__":
	main()