ftshijt commited on
Commit
be31546
·
1 Parent(s): e0b2439

Initial update for versa demo

Browse files
Files changed (6) hide show
  1. .gitignore +55 -0
  2. README.md +47 -6
  3. app.py +52 -150
  4. packages.txt +4 -0
  5. requirements.txt +11 -6
  6. test_versa.py +56 -0
.gitignore ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python cache files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ env/
8
+ build/
9
+ develop-eggs/
10
+ dist/
11
+ downloads/
12
+ eggs/
13
+ .eggs/
14
+ lib/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ wheels/
20
+ *.egg-info/
21
+ .installed.cfg
22
+ *.egg
23
+
24
+ # Virtual environment
25
+ venv/
26
+ ENV/
27
+
28
+ # Local development settings
29
+ .env
30
+ .venv
31
+ .idea/
32
+ .vscode/
33
+ *.swp
34
+ *.swo
35
+
36
+ # Data directories
37
+ data/
38
+ uploads/
39
+ results/
40
+
41
+ # Logs
42
+ logs/
43
+ *.log
44
+
45
+ # VERSA installation (will be cloned at runtime)
46
+ versa/
47
+
48
+ # OS generated files
49
+ .DS_Store
50
+ .DS_Store?
51
+ ._*
52
+ .Spotlight-V100
53
+ .Trashes
54
+ ehthumbs.db
55
+ Thumbs.db
README.md CHANGED
@@ -1,13 +1,54 @@
1
  ---
2
- title: Versa
3
- emoji: 🖼
4
- colorFrom: purple
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 5.0.1
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: VERSA Speech & Audio Evaluation Demo
3
+ emoji: 🎙️
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 4.7.1
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
+ # VERSA Speech & Audio Evaluation Demo
14
+
15
+ This demo allows you to evaluate speech and audio files using the VERSA (Versatile Evaluation of Speech and Audio) toolkit.
16
+
17
+ ## How to Use
18
+
19
+ 1. Upload a ground truth audio file (the reference audio)
20
+ 2. Upload a prediction audio file (the audio to be evaluated)
21
+ 3. Select an evaluation metric from the dropdown menu
22
+ 4. Click the "Evaluate" button
23
+ 5. View the results in the table and raw JSON format
24
+
25
+ ## About VERSA
26
+
27
+ VERSA is a toolkit dedicated to collecting evaluation metrics in speech and audio quality. It provides a comprehensive connection to cutting-edge evaluation techniques and is tightly integrated with ESPnet.
28
+
29
+ With full installation, VERSA offers over 80 metrics with 700+ metric variations based on different configurations. These metrics encompass evaluations utilizing diverse external resources, including matching and non-matching reference audio, text transcriptions, and text captions.
30
+
31
+ Learn more at the [VERSA GitHub Repository](https://github.com/wavlab-speech/versa).
32
+
33
+ ## Features
34
+
35
+ - Easy-to-use interface for audio evaluation
36
+ - Support for various evaluation metrics
37
+ - Detailed results displayed in table format
38
+ - Raw JSON output for further analysis
39
+
40
+ ## Citation
41
+
42
+ If you use VERSA in your research, please cite:
43
+
44
+ ```
45
+ @misc{shi2024versaversatileevaluationtoolkit,
46
+ title={VERSA: A Versatile Evaluation Toolkit for Speech, Audio, and Music},
47
+ author={Jiatong Shi and Hye-jin Shim and Jinchuan Tian and Siddhant Arora and Haibin Wu and Darius Petermann and Jia Qi Yip and You Zhang and Yuxun Tang and Wangyou Zhang and Dareen Safar Alharthi and Yichen Huang and Koichi Saito and Jionghao Han and Yiwen Zhao and Chris Donahue and Shinji Watanabe},
48
+ year={2024},
49
+ eprint={2412.17667},
50
+ archivePrefix={arXiv},
51
+ primaryClass={cs.SD},
52
+ url={https://arxiv.org/abs/2412.17667},
53
+ }
54
+ ```
app.py CHANGED
@@ -1,154 +1,56 @@
1
- import gradio as gr
2
- import numpy as np
3
- import random
4
-
5
- # import spaces #[uncomment to use ZeroGPU]
6
- from diffusers import DiffusionPipeline
7
- import torch
8
-
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
11
-
12
- if torch.cuda.is_available():
13
- torch_dtype = torch.float16
14
- else:
15
- torch_dtype = torch.float32
16
-
17
- pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
18
- pipe = pipe.to(device)
19
-
20
- MAX_SEED = np.iinfo(np.int32).max
21
- MAX_IMAGE_SIZE = 1024
22
-
23
-
24
- # @spaces.GPU #[uncomment to use ZeroGPU]
25
- def infer(
26
- prompt,
27
- negative_prompt,
28
- seed,
29
- randomize_seed,
30
- width,
31
- height,
32
- guidance_scale,
33
- num_inference_steps,
34
- progress=gr.Progress(track_tqdm=True),
35
- ):
36
- if randomize_seed:
37
- seed = random.randint(0, MAX_SEED)
38
-
39
- generator = torch.Generator().manual_seed(seed)
40
-
41
- image = pipe(
42
- prompt=prompt,
43
- negative_prompt=negative_prompt,
44
- guidance_scale=guidance_scale,
45
- num_inference_steps=num_inference_steps,
46
- width=width,
47
- height=height,
48
- generator=generator,
49
- ).images[0]
50
-
51
- return image, seed
52
-
53
-
54
- examples = [
55
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
56
- "An astronaut riding a green horse",
57
- "A delicious ceviche cheesecake slice",
58
- ]
59
-
60
- css = """
61
- #col-container {
62
- margin: 0 auto;
63
- max-width: 640px;
64
- }
65
  """
66
 
67
- with gr.Blocks(css=css) as demo:
68
- with gr.Column(elem_id="col-container"):
69
- gr.Markdown(" # Text-to-Image Gradio Template")
70
-
71
- with gr.Row():
72
- prompt = gr.Text(
73
- label="Prompt",
74
- show_label=False,
75
- max_lines=1,
76
- placeholder="Enter your prompt",
77
- container=False,
78
- )
79
-
80
- run_button = gr.Button("Run", scale=0, variant="primary")
81
-
82
- result = gr.Image(label="Result", show_label=False)
83
-
84
- with gr.Accordion("Advanced Settings", open=False):
85
- negative_prompt = gr.Text(
86
- label="Negative prompt",
87
- max_lines=1,
88
- placeholder="Enter a negative prompt",
89
- visible=False,
90
- )
91
-
92
- seed = gr.Slider(
93
- label="Seed",
94
- minimum=0,
95
- maximum=MAX_SEED,
96
- step=1,
97
- value=0,
98
- )
99
-
100
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
101
-
102
- with gr.Row():
103
- width = gr.Slider(
104
- label="Width",
105
- minimum=256,
106
- maximum=MAX_IMAGE_SIZE,
107
- step=32,
108
- value=1024, # Replace with defaults that work for your model
109
- )
110
-
111
- height = gr.Slider(
112
- label="Height",
113
- minimum=256,
114
- maximum=MAX_IMAGE_SIZE,
115
- step=32,
116
- value=1024, # Replace with defaults that work for your model
117
- )
118
-
119
- with gr.Row():
120
- guidance_scale = gr.Slider(
121
- label="Guidance scale",
122
- minimum=0.0,
123
- maximum=10.0,
124
- step=0.1,
125
- value=0.0, # Replace with defaults that work for your model
126
- )
127
-
128
- num_inference_steps = gr.Slider(
129
- label="Number of inference steps",
130
- minimum=1,
131
- maximum=50,
132
- step=1,
133
- value=2, # Replace with defaults that work for your model
134
- )
135
-
136
- gr.Examples(examples=examples, inputs=[prompt])
137
- gr.on(
138
- triggers=[run_button.click, prompt.submit],
139
- fn=infer,
140
- inputs=[
141
- prompt,
142
- negative_prompt,
143
- seed,
144
- randomize_seed,
145
- width,
146
- height,
147
- guidance_scale,
148
- num_inference_steps,
149
- ],
150
- outputs=[result, seed],
151
- )
152
 
153
  if __name__ == "__main__":
154
- demo.launch()
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script to verify that VERSA is installed correctly.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  """
5
 
6
+ import os
7
+ import sys
8
+ import subprocess
9
+ from pathlib import Path
10
+
11
+ # Check if VERSA is installed
12
+ VERSA_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "versa")
13
+
14
+ def check_versa():
15
+ """Check if VERSA is installed and working"""
16
+ print("Testing VERSA installation...")
17
+
18
+ if not os.path.exists(VERSA_ROOT):
19
+ print("VERSA not found.")
20
+ return False
21
+
22
+ # Check if the scorer.py exists
23
+ scorer_path = os.path.join(VERSA_ROOT, "versa", "bin", "scorer.py")
24
+ if not os.path.exists(scorer_path):
25
+ print(f"VERSA scorer not found at {scorer_path}")
26
+ return False
27
+
28
+ # Check if the config directory exists
29
+ config_dir = os.path.join(VERSA_ROOT, "egs")
30
+ if not os.path.exists(config_dir):
31
+ print(f"VERSA config directory not found at {config_dir}")
32
+ return False
33
+
34
+ # Check for available metrics
35
+ metrics = []
36
+ for root, _, files in os.walk(config_dir):
37
+ for file in files:
38
+ if file.endswith('.yaml'):
39
+ metrics.append(os.path.join(root, file))
40
+
41
+ if not metrics:
42
+ print("No metric configurations found in VERSA.")
43
+ return False
44
+
45
+ print(f"Found {len(metrics)} metric configurations.")
46
+ for metric in metrics[:5]: # Print first 5 metrics
47
+ print(f"- {os.path.relpath(metric, config_dir)}")
48
+
49
+ if len(metrics) > 5:
50
+ print(f"... and {len(metrics) - 5} more.")
51
+
52
+ print("VERSA installation looks good!")
53
+ return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  if __name__ == "__main__":
56
+ check_versa()
packages.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ git
2
+ build-essential
3
+ libsndfile1
4
+ ffmpeg
requirements.txt CHANGED
@@ -1,6 +1,11 @@
1
- accelerate
2
- diffusers
3
- invisible_watermark
4
- torch
5
- transformers
6
- xformers
 
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ pyyaml>=6.0
3
+ pandas>=1.5.0
4
+ numpy>=1.20.0
5
+ matplotlib>=3.5.0
6
+ soundfile>=0.12.1
7
+ scipy>=1.7.0
8
+ torch>=1.10.0
9
+ torchaudio>=0.10.0
10
+ librosa>=0.9.2
11
+ GitPython>=3.1.30
test_versa.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script to verify that VERSA is installed correctly.
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ import subprocess
9
+ from pathlib import Path
10
+
11
+ # Check if VERSA is installed
12
+ VERSA_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "versa")
13
+
14
+ def check_versa():
15
+ """Check if VERSA is installed and working"""
16
+ print("Testing VERSA installation...")
17
+
18
+ if not os.path.exists(VERSA_ROOT):
19
+ print("VERSA not found.")
20
+ return False
21
+
22
+ # Check if the scorer.py exists
23
+ scorer_path = os.path.join(VERSA_ROOT, "versa", "bin", "scorer.py")
24
+ if not os.path.exists(scorer_path):
25
+ print(f"VERSA scorer not found at {scorer_path}")
26
+ return False
27
+
28
+ # Check if the config directory exists
29
+ config_dir = os.path.join(VERSA_ROOT, "egs")
30
+ if not os.path.exists(config_dir):
31
+ print(f"VERSA config directory not found at {config_dir}")
32
+ return False
33
+
34
+ # Check for available metrics
35
+ metrics = []
36
+ for root, _, files in os.walk(config_dir):
37
+ for file in files:
38
+ if file.endswith('.yaml'):
39
+ metrics.append(os.path.join(root, file))
40
+
41
+ if not metrics:
42
+ print("No metric configurations found in VERSA.")
43
+ return False
44
+
45
+ print(f"Found {len(metrics)} metric configurations.")
46
+ for metric in metrics[:5]: # Print first 5 metrics
47
+ print(f"- {os.path.relpath(metric, config_dir)}")
48
+
49
+ if len(metrics) > 5:
50
+ print(f"... and {len(metrics) - 5} more.")
51
+
52
+ print("VERSA installation looks good!")
53
+ return True
54
+
55
+ if __name__ == "__main__":
56
+ check_versa()