Spaces:
Sleeping
Sleeping
ftshijt
commited on
Commit
·
be31546
1
Parent(s):
e0b2439
Initial update for versa demo
Browse files- .gitignore +55 -0
- README.md +47 -6
- app.py +52 -150
- packages.txt +4 -0
- requirements.txt +11 -6
- test_versa.py +56 -0
.gitignore
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python cache files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
*.so
|
6 |
+
.Python
|
7 |
+
env/
|
8 |
+
build/
|
9 |
+
develop-eggs/
|
10 |
+
dist/
|
11 |
+
downloads/
|
12 |
+
eggs/
|
13 |
+
.eggs/
|
14 |
+
lib/
|
15 |
+
lib64/
|
16 |
+
parts/
|
17 |
+
sdist/
|
18 |
+
var/
|
19 |
+
wheels/
|
20 |
+
*.egg-info/
|
21 |
+
.installed.cfg
|
22 |
+
*.egg
|
23 |
+
|
24 |
+
# Virtual environment
|
25 |
+
venv/
|
26 |
+
ENV/
|
27 |
+
|
28 |
+
# Local development settings
|
29 |
+
.env
|
30 |
+
.venv
|
31 |
+
.idea/
|
32 |
+
.vscode/
|
33 |
+
*.swp
|
34 |
+
*.swo
|
35 |
+
|
36 |
+
# Data directories
|
37 |
+
data/
|
38 |
+
uploads/
|
39 |
+
results/
|
40 |
+
|
41 |
+
# Logs
|
42 |
+
logs/
|
43 |
+
*.log
|
44 |
+
|
45 |
+
# VERSA installation (will be cloned at runtime)
|
46 |
+
versa/
|
47 |
+
|
48 |
+
# OS generated files
|
49 |
+
.DS_Store
|
50 |
+
.DS_Store?
|
51 |
+
._*
|
52 |
+
.Spotlight-V100
|
53 |
+
.Trashes
|
54 |
+
ehthumbs.db
|
55 |
+
Thumbs.db
|
README.md
CHANGED
@@ -1,13 +1,54 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: VERSA Speech & Audio Evaluation Demo
|
3 |
+
emoji: 🎙️
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.7.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
+
# VERSA Speech & Audio Evaluation Demo
|
14 |
+
|
15 |
+
This demo allows you to evaluate speech and audio files using the VERSA (Versatile Evaluation of Speech and Audio) toolkit.
|
16 |
+
|
17 |
+
## How to Use
|
18 |
+
|
19 |
+
1. Upload a ground truth audio file (the reference audio)
|
20 |
+
2. Upload a prediction audio file (the audio to be evaluated)
|
21 |
+
3. Select an evaluation metric from the dropdown menu
|
22 |
+
4. Click the "Evaluate" button
|
23 |
+
5. View the results in the table and raw JSON format
|
24 |
+
|
25 |
+
## About VERSA
|
26 |
+
|
27 |
+
VERSA is a toolkit dedicated to collecting evaluation metrics in speech and audio quality. It provides a comprehensive connection to cutting-edge evaluation techniques and is tightly integrated with ESPnet.
|
28 |
+
|
29 |
+
With full installation, VERSA offers over 80 metrics with 700+ metric variations based on different configurations. These metrics encompass evaluations utilizing diverse external resources, including matching and non-matching reference audio, text transcriptions, and text captions.
|
30 |
+
|
31 |
+
Learn more at the [VERSA GitHub Repository](https://github.com/wavlab-speech/versa).
|
32 |
+
|
33 |
+
## Features
|
34 |
+
|
35 |
+
- Easy-to-use interface for audio evaluation
|
36 |
+
- Support for various evaluation metrics
|
37 |
+
- Detailed results displayed in table format
|
38 |
+
- Raw JSON output for further analysis
|
39 |
+
|
40 |
+
## Citation
|
41 |
+
|
42 |
+
If you use VERSA in your research, please cite:
|
43 |
+
|
44 |
+
```
|
45 |
+
@misc{shi2024versaversatileevaluationtoolkit,
|
46 |
+
title={VERSA: A Versatile Evaluation Toolkit for Speech, Audio, and Music},
|
47 |
+
author={Jiatong Shi and Hye-jin Shim and Jinchuan Tian and Siddhant Arora and Haibin Wu and Darius Petermann and Jia Qi Yip and You Zhang and Yuxun Tang and Wangyou Zhang and Dareen Safar Alharthi and Yichen Huang and Koichi Saito and Jionghao Han and Yiwen Zhao and Chris Donahue and Shinji Watanabe},
|
48 |
+
year={2024},
|
49 |
+
eprint={2412.17667},
|
50 |
+
archivePrefix={arXiv},
|
51 |
+
primaryClass={cs.SD},
|
52 |
+
url={https://arxiv.org/abs/2412.17667},
|
53 |
+
}
|
54 |
+
```
|
app.py
CHANGED
@@ -1,154 +1,56 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
# import spaces #[uncomment to use ZeroGPU]
|
6 |
-
from diffusers import DiffusionPipeline
|
7 |
-
import torch
|
8 |
-
|
9 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
-
model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
|
11 |
-
|
12 |
-
if torch.cuda.is_available():
|
13 |
-
torch_dtype = torch.float16
|
14 |
-
else:
|
15 |
-
torch_dtype = torch.float32
|
16 |
-
|
17 |
-
pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
|
18 |
-
pipe = pipe.to(device)
|
19 |
-
|
20 |
-
MAX_SEED = np.iinfo(np.int32).max
|
21 |
-
MAX_IMAGE_SIZE = 1024
|
22 |
-
|
23 |
-
|
24 |
-
# @spaces.GPU #[uncomment to use ZeroGPU]
|
25 |
-
def infer(
|
26 |
-
prompt,
|
27 |
-
negative_prompt,
|
28 |
-
seed,
|
29 |
-
randomize_seed,
|
30 |
-
width,
|
31 |
-
height,
|
32 |
-
guidance_scale,
|
33 |
-
num_inference_steps,
|
34 |
-
progress=gr.Progress(track_tqdm=True),
|
35 |
-
):
|
36 |
-
if randomize_seed:
|
37 |
-
seed = random.randint(0, MAX_SEED)
|
38 |
-
|
39 |
-
generator = torch.Generator().manual_seed(seed)
|
40 |
-
|
41 |
-
image = pipe(
|
42 |
-
prompt=prompt,
|
43 |
-
negative_prompt=negative_prompt,
|
44 |
-
guidance_scale=guidance_scale,
|
45 |
-
num_inference_steps=num_inference_steps,
|
46 |
-
width=width,
|
47 |
-
height=height,
|
48 |
-
generator=generator,
|
49 |
-
).images[0]
|
50 |
-
|
51 |
-
return image, seed
|
52 |
-
|
53 |
-
|
54 |
-
examples = [
|
55 |
-
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
|
56 |
-
"An astronaut riding a green horse",
|
57 |
-
"A delicious ceviche cheesecake slice",
|
58 |
-
]
|
59 |
-
|
60 |
-
css = """
|
61 |
-
#col-container {
|
62 |
-
margin: 0 auto;
|
63 |
-
max-width: 640px;
|
64 |
-
}
|
65 |
"""
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
step=32,
|
116 |
-
value=1024, # Replace with defaults that work for your model
|
117 |
-
)
|
118 |
-
|
119 |
-
with gr.Row():
|
120 |
-
guidance_scale = gr.Slider(
|
121 |
-
label="Guidance scale",
|
122 |
-
minimum=0.0,
|
123 |
-
maximum=10.0,
|
124 |
-
step=0.1,
|
125 |
-
value=0.0, # Replace with defaults that work for your model
|
126 |
-
)
|
127 |
-
|
128 |
-
num_inference_steps = gr.Slider(
|
129 |
-
label="Number of inference steps",
|
130 |
-
minimum=1,
|
131 |
-
maximum=50,
|
132 |
-
step=1,
|
133 |
-
value=2, # Replace with defaults that work for your model
|
134 |
-
)
|
135 |
-
|
136 |
-
gr.Examples(examples=examples, inputs=[prompt])
|
137 |
-
gr.on(
|
138 |
-
triggers=[run_button.click, prompt.submit],
|
139 |
-
fn=infer,
|
140 |
-
inputs=[
|
141 |
-
prompt,
|
142 |
-
negative_prompt,
|
143 |
-
seed,
|
144 |
-
randomize_seed,
|
145 |
-
width,
|
146 |
-
height,
|
147 |
-
guidance_scale,
|
148 |
-
num_inference_steps,
|
149 |
-
],
|
150 |
-
outputs=[result, seed],
|
151 |
-
)
|
152 |
|
153 |
if __name__ == "__main__":
|
154 |
-
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Test script to verify that VERSA is installed correctly.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
"""
|
5 |
|
6 |
+
import os
|
7 |
+
import sys
|
8 |
+
import subprocess
|
9 |
+
from pathlib import Path
|
10 |
+
|
11 |
+
# Check if VERSA is installed
|
12 |
+
VERSA_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "versa")
|
13 |
+
|
14 |
+
def check_versa():
|
15 |
+
"""Check if VERSA is installed and working"""
|
16 |
+
print("Testing VERSA installation...")
|
17 |
+
|
18 |
+
if not os.path.exists(VERSA_ROOT):
|
19 |
+
print("VERSA not found.")
|
20 |
+
return False
|
21 |
+
|
22 |
+
# Check if the scorer.py exists
|
23 |
+
scorer_path = os.path.join(VERSA_ROOT, "versa", "bin", "scorer.py")
|
24 |
+
if not os.path.exists(scorer_path):
|
25 |
+
print(f"VERSA scorer not found at {scorer_path}")
|
26 |
+
return False
|
27 |
+
|
28 |
+
# Check if the config directory exists
|
29 |
+
config_dir = os.path.join(VERSA_ROOT, "egs")
|
30 |
+
if not os.path.exists(config_dir):
|
31 |
+
print(f"VERSA config directory not found at {config_dir}")
|
32 |
+
return False
|
33 |
+
|
34 |
+
# Check for available metrics
|
35 |
+
metrics = []
|
36 |
+
for root, _, files in os.walk(config_dir):
|
37 |
+
for file in files:
|
38 |
+
if file.endswith('.yaml'):
|
39 |
+
metrics.append(os.path.join(root, file))
|
40 |
+
|
41 |
+
if not metrics:
|
42 |
+
print("No metric configurations found in VERSA.")
|
43 |
+
return False
|
44 |
+
|
45 |
+
print(f"Found {len(metrics)} metric configurations.")
|
46 |
+
for metric in metrics[:5]: # Print first 5 metrics
|
47 |
+
print(f"- {os.path.relpath(metric, config_dir)}")
|
48 |
+
|
49 |
+
if len(metrics) > 5:
|
50 |
+
print(f"... and {len(metrics) - 5} more.")
|
51 |
+
|
52 |
+
print("VERSA installation looks good!")
|
53 |
+
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
if __name__ == "__main__":
|
56 |
+
check_versa()
|
packages.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
git
|
2 |
+
build-essential
|
3 |
+
libsndfile1
|
4 |
+
ffmpeg
|
requirements.txt
CHANGED
@@ -1,6 +1,11 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio>=4.0.0
|
2 |
+
pyyaml>=6.0
|
3 |
+
pandas>=1.5.0
|
4 |
+
numpy>=1.20.0
|
5 |
+
matplotlib>=3.5.0
|
6 |
+
soundfile>=0.12.1
|
7 |
+
scipy>=1.7.0
|
8 |
+
torch>=1.10.0
|
9 |
+
torchaudio>=0.10.0
|
10 |
+
librosa>=0.9.2
|
11 |
+
GitPython>=3.1.30
|
test_versa.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Test script to verify that VERSA is installed correctly.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import sys
|
8 |
+
import subprocess
|
9 |
+
from pathlib import Path
|
10 |
+
|
11 |
+
# Check if VERSA is installed
|
12 |
+
VERSA_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "versa")
|
13 |
+
|
14 |
+
def check_versa():
|
15 |
+
"""Check if VERSA is installed and working"""
|
16 |
+
print("Testing VERSA installation...")
|
17 |
+
|
18 |
+
if not os.path.exists(VERSA_ROOT):
|
19 |
+
print("VERSA not found.")
|
20 |
+
return False
|
21 |
+
|
22 |
+
# Check if the scorer.py exists
|
23 |
+
scorer_path = os.path.join(VERSA_ROOT, "versa", "bin", "scorer.py")
|
24 |
+
if not os.path.exists(scorer_path):
|
25 |
+
print(f"VERSA scorer not found at {scorer_path}")
|
26 |
+
return False
|
27 |
+
|
28 |
+
# Check if the config directory exists
|
29 |
+
config_dir = os.path.join(VERSA_ROOT, "egs")
|
30 |
+
if not os.path.exists(config_dir):
|
31 |
+
print(f"VERSA config directory not found at {config_dir}")
|
32 |
+
return False
|
33 |
+
|
34 |
+
# Check for available metrics
|
35 |
+
metrics = []
|
36 |
+
for root, _, files in os.walk(config_dir):
|
37 |
+
for file in files:
|
38 |
+
if file.endswith('.yaml'):
|
39 |
+
metrics.append(os.path.join(root, file))
|
40 |
+
|
41 |
+
if not metrics:
|
42 |
+
print("No metric configurations found in VERSA.")
|
43 |
+
return False
|
44 |
+
|
45 |
+
print(f"Found {len(metrics)} metric configurations.")
|
46 |
+
for metric in metrics[:5]: # Print first 5 metrics
|
47 |
+
print(f"- {os.path.relpath(metric, config_dir)}")
|
48 |
+
|
49 |
+
if len(metrics) > 5:
|
50 |
+
print(f"... and {len(metrics) - 5} more.")
|
51 |
+
|
52 |
+
print("VERSA installation looks good!")
|
53 |
+
return True
|
54 |
+
|
55 |
+
if __name__ == "__main__":
|
56 |
+
check_versa()
|