Spaces:
Sleeping
Sleeping
File size: 4,734 Bytes
72c20ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import os
import gradio as gr
import json
import requests
import random
labels = ["Real Audio 🗣️", "Cloned Audio 🤖"]
DURATION = 2
def get_accuracy(score_matrix) -> str:
correct = score_matrix[0][0] + score_matrix[1][1]
total = sum(score_matrix[0]) + sum(score_matrix[1])
if total == 0:
return ""
accuracy = correct / total * 100
return f"{accuracy:.2f}%"
def audio_link(path: str, model: str):
"""Get the link to the audio file for a given path and model."""
return f"https://huggingface.co/datasets/jerpint/vox-cloned-data/resolve/main/{model}/{path}?download=true"
def confusion_matrix_to_markdown(matrix, labels=None):
num_labels = len(matrix)
labels = labels or [f"Class {i}" for i in range(num_labels)]
accuracy = get_accuracy(matrix)
# Header row
markdown = f"| {' | '.join([''] + labels)} |\n"
markdown += f"| {' | '.join(['---'] * (num_labels + 1))} |\n"
# Data rows
for i, row in enumerate(matrix):
markdown += f"| {labels[i]} | " + " | ".join(map(str, row)) + " |\n"
markdown += f"\nAccuracy %: {accuracy}\n"
return markdown
def load_and_cache_data():
json_link = "https://huggingface.co/datasets/jerpint/vox-cloned-data/resolve/main/files.json?download=true"
local_file = "files.json"
if not os.path.exists(local_file):
json_file = requests.get(json_link)
if json_file.status_code != 200:
raise Exception(f"Failed to load data from {json_link}")
# Cache the file
with open(local_file, "w") as f:
f.write(json_file.text)
with open(local_file, "r") as f:
return json.load(f)
def load_data():
json_link = "https://huggingface.co/datasets/jerpint/vox-cloned-data/resolve/main/files.json?download=true"
json_file = requests.get(json_link)
if json_file.status_code != 200:
raise Exception(f"Failed to load data from {json_link}")
print("Loaded data")
return json.loads(json_file.text)
def select_random_model(path):
"""Select a random model from the list of models for a given path.
Will select commonvoice 50% of the time, and a random other model 50% of the time.
"""
if random.random() < 0.5:
return "commonvoice"
else:
other_models = [m for m in data[path] if m != "commonvoice"]
return random.choice(other_models)
def get_random_audio():
path = random.choice(paths)
model = select_random_model(path)
return path, model
def next_audio():
new_audio = get_random_audio()
audio_cmp = gr.Audio(audio_link(new_audio[0], new_audio[1]))
return audio_cmp, new_audio
data = load_data()
# Keep only samples with minimum 2 sources
data = {path: data[path] for path in data if len(data[path]) >= 2}
# List all available paths
paths = list(data.keys())
with gr.Blocks() as demo:
current_audio = gr.State(get_random_audio)
score_matrix = gr.State([[0, 0], [0, 0]])
with gr.Column():
with gr.Row():
audio_cmp = gr.Audio(
audio_link(current_audio.value[0], current_audio.value[1])
)
with gr.Column():
with gr.Row():
button1 = gr.Button("Real Audio 🗣️")
button2 = gr.Button("Cloned Audio 🤖")
score_md = gr.Markdown(confusion_matrix_to_markdown(score_matrix.value, labels))
@gr.on(
triggers=[button1.click],
inputs=[current_audio, score_matrix],
outputs=[audio_cmp, current_audio, score_matrix, score_md],
)
def check_result(x, score_matrix):
is_correct = x[1] == "commonvoice"
audio_cmp, current_audio = next_audio()
if is_correct:
gr.Info("Correct! Real Audio", duration=DURATION)
score_matrix[0][0] += 1
else:
gr.Warning("Incorrect! Cloned Audio", duration=DURATION)
score_matrix[0][1] += 1
score_md = confusion_matrix_to_markdown(score_matrix, labels)
return audio_cmp, current_audio, score_matrix, score_md
@gr.on(
triggers=[button2.click],
inputs=[current_audio, score_matrix],
outputs=[audio_cmp, current_audio, score_matrix, score_md],
)
def check_result(x, score_matrix):
is_correct = x[1] != "commonvoice"
audio_cmp, current_audio = next_audio()
if is_correct:
gr.Info("Correct! Cloned Audio", duration=DURATION)
score_matrix[1][1] += 1
else:
gr.Warning("Incorrect! Real Audio", duration=DURATION)
score_matrix[1][0] += 1
score_md = confusion_matrix_to_markdown(score_matrix, labels)
return audio_cmp, current_audio, score_matrix, score_md
demo.launch()
|