Spaces:

Speech-Arena-2025
/

Speech-DF-Arena

Running

App Files Files Community

Speech-DF-Arena / ui /leaderboard.py

Speech-Arena-2025

Update ui/leaderboard.py

dd7b0db verified 18 days ago

raw

history blame contribute delete

4.97 kB


	import pandas as pd
	import gradio as gr
	from utils import load_leaderboard
	import numpy as np
	from huggingface_hub import snapshot_download


	def make_clickable(url, name):
	return f'<a href="{url}" target="_blank">{name}</a>'

	def render_info_html():
	# info_text = "With the growing advent of machine-generated speech, the scientific community is responding with exciting resources " \
	# "to detect deep fakes. With research moving at such a rapid pace, it becomes challenging to keep track of generalizability " \
	# "of SOTA DF detection systems. This leaderboard thus presents a comprehensive benchmark of 10 SOTA speech antispoofing " \
	# "systems across 13 popular speech deep fake detection datasets."
	info_text = """The advent of machine generated speech calls for dedicated research to develop countermeasure systems to protect against their misuse.
	The speech DF arena leaderboard provides a standardized platform to compare different speech deepfake detection approaches and ranks them on Huggingface.
	By assessing models across diverse datasets and attack scenarios, the speech DF arena leaderboard aims to help researchers and developers enhance the reliability and robustness of deepfake detection systems
	, ensuring safer and more trustworthy audio communications. We report the average EER (lower the better).
	Models are ranked based on their Average EER, from lowest to highest. Check the Metrics tab to understand how the models are evaluated.
	If you want results for a model that is not listed here, you can submit a request for it to be included through under "submit your own system" tab.
	"""

	# HTML formatted info text
	return gr.Markdown(info_text)

	def highlight_min(s, props=''):
	return np.where(s == np.nanmin(s.values), props, '')

	def render_leader_board(leaderboard_df, model_citations):

	if not leaderboard_df.empty:
	print(leaderboard_df.shape)
	leaderboard_df.insert(3, 'Average EER(%)', leaderboard_df.iloc[:, 3:].mean(axis=1))

	leaderboard_df = leaderboard_df.sort_values(by="Average EER(%)", ascending=True).reset_index(drop=True)

	# Assign rank emojis 🥇🥈🥉
	leaderboard_df["System"] = leaderboard_df["System"].apply(lambda x: f"[{x}]({model_citations.get(x, '#')})")

	emojis = ["🥇", "🥈", "🥉"]

	leaderboard_df.loc[0, "System"] = f"{emojis[0]} {leaderboard_df.System[0]}"
	leaderboard_df.loc[1, "System"] = f"{emojis[1]} {leaderboard_df.System[1]}"
	leaderboard_df.loc[2, "System"] = f"{emojis[2]} {leaderboard_df.System[2]}"



	styler = (
	leaderboard_df
	.style \
	.format(precision=2)
	.apply(highlight_min, props='color:green', axis=0)
	)

	return gr.Dataframe(styler, datatype=['markdown'] * 2 + ['number'] * 16, elem_id="leaderboard-table")
	return gr.HTML(value="<p>No data available in the leaderboard.</p>")

	def render_citation():
	return gr.Markdown(r"""
	If you use Speech DF Arena in your work, it can be cited as:

	```bibtex
	@misc{speecharena-df-leaderboard,
	title = {Speech Arena: Speech DeepFake Leaderboard},
	author = {Speech Arena},
	year = 2025,
	publisher = {Hugging Face},
	howpublished = "\url{link}"
	}
	```""")

	def render_dataset_list():
	return gr.Markdown(r"""
	List of all the datasets used in the benchmarking:
	- [ASVSpoof 2019: A large-scale public database of synthesized, converted and replayed speech](https://www.asvspoof.org/index2019.html)
	- [ASVspoof 2021 DF & LA: accelerating progress in spoofed and deepfake speech detection](https://www.asvspoof.org/index2021.html)
	- [ASVspoof 5: Crowdsourced speech data, deepfakes, and adversarial attacks at scale](https://www.asvspoof.org/workshop2024)
	- [FakeOrReal: FoR: A Dataset for Synthetic Speech Detection](https://bil.eecs.yorku.ca/datasets/)
	- [Codecfake Yuankun et. al.](https://github.com/xieyuankun/Codecfake)
	- [ADD 2022 Track 2 & 3: the First Audio Deep Synthesis Detection Challenge](http://addchallenge.cn/add2022)
	- [ADD 2023 Round 2&3 the Second Audio Deepfake Detection Challenge](http://addchallenge.cn/add2023)
	- [DFADD: The Diffusion and Flow-Matching Based Audio Deepfake Dataset](https://github.com/isjwdu/DFADD)
	- [LibriVoc: Exposing AI-Synthesized Human Voices Using Neural Vocoder Artifacts](https://github.com/csun22/Synthetic-Voice-Detection-Vocoder-Artifacts)
	- [SONAR: A Synthetic AI-Audio Detection Framework and Benchmark](https://github.com/Jessegator/SONAR)
	- [In-the-Wild: Does Audio Deepfake Detection Generalize?](https://deepfake-total.com/in_the_wild)

	""")