import os import gradio as gr import json import requests import random labels = ["Real Audio 🗣️", "Cloned Audio 🤖"] DURATION = 2 def get_accuracy(score_matrix) -> str: correct = score_matrix[0][0] + score_matrix[1][1] total = sum(score_matrix[0]) + sum(score_matrix[1]) if total == 0: return "" accuracy = correct / total * 100 return f"{accuracy:.2f}%" def audio_link(path: str, model: str): """Get the link to the audio file for a given path and model.""" return f"https://huggingface.co/datasets/jerpint/vox-cloned-data/resolve/main/{model}/{path}?download=true" def confusion_matrix_to_markdown(matrix, labels=None): num_labels = len(matrix) labels = labels or [f"Class {i}" for i in range(num_labels)] accuracy = get_accuracy(matrix) # Header row markdown = f"| {' | '.join([''] + labels)} |\n" markdown += f"| {' | '.join(['---'] * (num_labels + 1))} |\n" # Data rows for i, row in enumerate(matrix): markdown += f"| {labels[i]} | " + " | ".join(map(str, row)) + " |\n" markdown += f"\nAccuracy %: {accuracy}\n" return markdown def load_and_cache_data(): json_link = "https://huggingface.co/datasets/jerpint/vox-cloned-data/resolve/main/files.json?download=true" local_file = "files.json" if not os.path.exists(local_file): json_file = requests.get(json_link) if json_file.status_code != 200: raise Exception(f"Failed to load data from {json_link}") # Cache the file with open(local_file, "w") as f: f.write(json_file.text) with open(local_file, "r") as f: return json.load(f) def load_data(): json_link = "https://huggingface.co/datasets/jerpint/vox-cloned-data/resolve/main/files.json?download=true" json_file = requests.get(json_link) if json_file.status_code != 200: raise Exception(f"Failed to load data from {json_link}") print("Loaded data") return json.loads(json_file.text) def select_random_model(path): """Select a random model from the list of models for a given path. Will select commonvoice 50% of the time, and a random other model 50% of the time. """ if random.random() < 0.5: return "commonvoice" else: other_models = [m for m in data[path] if m != "commonvoice"] return random.choice(other_models) def get_random_audio(): path = random.choice(paths) model = select_random_model(path) return path, model def next_audio(): new_audio = get_random_audio() audio_cmp = gr.Audio(audio_link(new_audio[0], new_audio[1])) return audio_cmp, new_audio data = load_data() # Keep only samples with minimum 2 sources data = {path: data[path] for path in data if len(data[path]) >= 2} # List all available paths paths = list(data.keys()) with gr.Blocks() as demo: current_audio = gr.State(get_random_audio) score_matrix = gr.State([[0, 0], [0, 0]]) with gr.Column(): with gr.Row(): audio_cmp = gr.Audio( audio_link(current_audio.value[0], current_audio.value[1]) ) with gr.Column(): with gr.Row(): button1 = gr.Button("Real Audio 🗣️") button2 = gr.Button("Cloned Audio 🤖") score_md = gr.Markdown(confusion_matrix_to_markdown(score_matrix.value, labels)) @gr.on( triggers=[button1.click], inputs=[current_audio, score_matrix], outputs=[audio_cmp, current_audio, score_matrix, score_md], ) def check_result(x, score_matrix): is_correct = x[1] == "commonvoice" audio_cmp, current_audio = next_audio() if is_correct: gr.Info("Correct! Real Audio", duration=DURATION) score_matrix[0][0] += 1 else: gr.Warning("Incorrect! Cloned Audio", duration=DURATION) score_matrix[0][1] += 1 score_md = confusion_matrix_to_markdown(score_matrix, labels) return audio_cmp, current_audio, score_matrix, score_md @gr.on( triggers=[button2.click], inputs=[current_audio, score_matrix], outputs=[audio_cmp, current_audio, score_matrix, score_md], ) def check_result(x, score_matrix): is_correct = x[1] != "commonvoice" audio_cmp, current_audio = next_audio() if is_correct: gr.Info("Correct! Cloned Audio", duration=DURATION) score_matrix[1][1] += 1 else: gr.Warning("Incorrect! Real Audio", duration=DURATION) score_matrix[1][0] += 1 score_md = confusion_matrix_to_markdown(score_matrix, labels) return audio_cmp, current_audio, score_matrix, score_md demo.launch()