Spaces:
Sleeping
Sleeping
import os | |
import sys | |
import subprocess | |
import gradio as gr | |
import json | |
import yaml | |
import tempfile | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import time | |
from pathlib import Path | |
# Check if VERSA is installed, if not, clone and install it | |
VERSA_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "versa") | |
def setup_versa(): | |
"""Set up VERSA and its dependencies if not already installed""" | |
if not os.path.exists(VERSA_ROOT): | |
print("VERSA not found. Installing...") | |
# Clone VERSA repository | |
subprocess.run( | |
["git", "clone", "https://github.com/shinjiwlab/versa.git", VERSA_ROOT], | |
check=True | |
) | |
# Install VERSA | |
subprocess.run( | |
["pip", "install", "-e", VERSA_ROOT], | |
check=True | |
) | |
print("VERSA installed successfully!") | |
else: | |
print("VERSA already installed.") | |
# Install VERSA if not already installed | |
setup_versa() | |
# VERSA paths | |
VERSA_BIN = os.path.join(VERSA_ROOT, "versa", "bin", "scorer.py") | |
VERSA_CONFIG_DIR = os.path.join(VERSA_ROOT, "egs") | |
# Create data directory if it doesn't exist | |
DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") | |
UPLOAD_DIR = os.path.join(DATA_DIR, "uploads") | |
RESULTS_DIR = os.path.join(DATA_DIR, "results") | |
for directory in [DATA_DIR, UPLOAD_DIR, RESULTS_DIR]: | |
os.makedirs(directory, exist_ok=True) | |
# Find available metric configs | |
def get_available_metrics(): | |
"""Get list of available metrics from VERSA config directory""" | |
metrics = [] | |
# Get all YAML files from the egs directory | |
for root, _, files in os.walk(VERSA_CONFIG_DIR): | |
for file in files: | |
if file.endswith('.yaml'): | |
path = os.path.join(root, file) | |
# Get relative path from VERSA_CONFIG_DIR | |
rel_path = os.path.relpath(path, VERSA_CONFIG_DIR) | |
metrics.append(rel_path) | |
return sorted(metrics) | |
# Get metric description from YAML file | |
def get_metric_description(metric_path): | |
"""Get description of a metric from its YAML file""" | |
full_path = os.path.join(VERSA_CONFIG_DIR, metric_path) | |
try: | |
with open(full_path, 'r') as f: | |
config = yaml.safe_load(f) | |
return config.get('description', 'No description available') | |
except Exception as e: | |
return f"Could not load description: {str(e)}" | |
# Process audio files and run VERSA evaluation | |
def evaluate_audio(gt_file, pred_file, metric_config, include_timestamps=False): | |
"""Evaluate audio files using VERSA""" | |
if gt_file is None or pred_file is None: | |
return "Please upload both ground truth and prediction audio files." | |
# Create temp directory for results | |
with tempfile.TemporaryDirectory() as temp_dir: | |
output_file = os.path.join(temp_dir, "result.json") | |
# Full path to metric config | |
metric_config_path = os.path.join(VERSA_CONFIG_DIR, metric_config) | |
# Build command | |
cmd = [ | |
sys.executable, VERSA_BIN, | |
"--score_config", metric_config_path, | |
"--gt", gt_file, | |
"--pred", pred_file, | |
"--output_file", output_file | |
] | |
if include_timestamps: | |
cmd.append("--include_timestamp") | |
# Run VERSA evaluation | |
try: | |
process = subprocess.run( | |
cmd, | |
check=True, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, | |
text=True | |
) | |
# Read results | |
if os.path.exists(output_file): | |
with open(output_file, 'r') as f: | |
results = json.load(f) | |
# Format results as DataFrame | |
if results: | |
results_df = pd.DataFrame(results) | |
return results_df, json.dumps(results, indent=2) | |
else: | |
return None, "Evaluation completed but no results were generated." | |
else: | |
return None, "Evaluation completed but no results file was generated." | |
except subprocess.CalledProcessError as e: | |
return None, f"Error running VERSA: {e.stderr}" | |
# Create the Gradio interface | |
def create_gradio_demo(): | |
"""Create the Gradio demo interface""" | |
available_metrics = get_available_metrics() | |
default_metric = "speech.yaml" if "speech.yaml" in available_metrics else available_metrics[0] if available_metrics else None | |
with gr.Blocks(title="VERSA Speech & Audio Evaluation Demo") as demo: | |
gr.Markdown("# VERSA: Versatile Evaluation of Speech and Audio") | |
gr.Markdown("Upload audio files to evaluate them using VERSA metrics.") | |
with gr.Row(): | |
with gr.Column(): | |
gt_audio = gr.Audio(label="Ground Truth Audio", type="filepath", sources=["upload", "microphone"]) | |
pred_audio = gr.Audio(label="Prediction Audio", type="filepath", sources=["upload", "microphone"]) | |
metric_dropdown = gr.Dropdown( | |
choices=available_metrics, | |
label="Evaluation Metric", | |
value=default_metric | |
) | |
metric_description = gr.Textbox( | |
label="Metric Description", | |
value=get_metric_description(default_metric) if default_metric else "", | |
interactive=False | |
) | |
include_timestamps = gr.Checkbox( | |
label="Include Timestamps in Results", | |
value=False | |
) | |
eval_button = gr.Button("Evaluate") | |
with gr.Column(): | |
results_table = gr.Dataframe(label="Evaluation Results") | |
raw_json = gr.Code(language="json", label="Raw Results") | |
# Event handlers | |
def update_metric_description(metric_path): | |
return get_metric_description(metric_path) | |
metric_dropdown.change( | |
fn=update_metric_description, | |
inputs=[metric_dropdown], | |
outputs=[metric_description] | |
) | |
eval_button.click( | |
fn=evaluate_audio, | |
inputs=[gt_audio, pred_audio, metric_dropdown, include_timestamps], | |
outputs=[results_table, raw_json] | |
) | |
gr.Markdown(""" | |
## About VERSA | |
VERSA (Versatile Evaluation of Speech and Audio) is a toolkit dedicated to collecting evaluation metrics in speech and audio quality. It provides a comprehensive connection to cutting-edge evaluation techniques and is tightly integrated with ESPnet. | |
With full installation, VERSA offers over 60 metrics with 700+ metric variations based on different configurations. These metrics encompass evaluations utilizing diverse external resources, including matching and non-matching reference audio, text transcriptions, and text captions. | |
Learn more at [VERSA GitHub Repository](https://github.com/shinjiwlab/versa) | |
""") | |
return demo | |
# Launch the app | |
if __name__ == "__main__": | |
demo = create_gradio_demo() | |
demo.launch() | |