Spaces:
Running
Running
admin
commited on
Commit
·
d08ce40
1
Parent(s):
2f89051
fix frame display style
Browse files
app.py
CHANGED
@@ -105,6 +105,14 @@ def load(audio_path: str, converto="mel"):
|
|
105 |
return list(norm(Xtr_spec))
|
106 |
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
def infer(audio_path: str, log_name: str):
|
109 |
if not audio_path:
|
110 |
return None, "Please input an audio!"
|
@@ -113,6 +121,8 @@ def infer(audio_path: str, log_name: str):
|
|
113 |
spec = log_name.split("_")[-1]
|
114 |
try:
|
115 |
input = load(audio_path, converto=spec)
|
|
|
|
|
116 |
if "vit" in backbone or "swin" in backbone:
|
117 |
eval_net = t_EvalNet(
|
118 |
backbone,
|
@@ -140,9 +150,14 @@ def infer(audio_path: str, log_name: str):
|
|
140 |
for y in output:
|
141 |
preds = list(y.T)
|
142 |
for pred in preds:
|
|
|
|
|
|
|
|
|
|
|
143 |
outputs.append(
|
144 |
{
|
145 |
-
"Frame":
|
146 |
"Tech": TRANSLATE[CLASSES[torch.argmax(pred).item()]],
|
147 |
}
|
148 |
)
|
|
|
105 |
return list(norm(Xtr_spec))
|
106 |
|
107 |
|
108 |
+
def format_second(seconds):
|
109 |
+
integer_part = int(seconds)
|
110 |
+
decimal_part = round(seconds - integer_part, 3)
|
111 |
+
hours, remainder = divmod(integer_part, 3600)
|
112 |
+
minutes, seconds = divmod(remainder, 60)
|
113 |
+
return f"{hours:02}:{minutes:02}:{seconds:02}.{decimal_part:.3f}"
|
114 |
+
|
115 |
+
|
116 |
def infer(audio_path: str, log_name: str):
|
117 |
if not audio_path:
|
118 |
return None, "Please input an audio!"
|
|
|
121 |
spec = log_name.split("_")[-1]
|
122 |
try:
|
123 |
input = load(audio_path, converto=spec)
|
124 |
+
dur = librosa.get_duration(path=audio_path)
|
125 |
+
frames_per_3s = input[0].shape[1]
|
126 |
if "vit" in backbone or "swin" in backbone:
|
127 |
eval_net = t_EvalNet(
|
128 |
backbone,
|
|
|
150 |
for y in output:
|
151 |
preds = list(y.T)
|
152 |
for pred in preds:
|
153 |
+
start = index * TIME_LENGTH / frames_per_3s
|
154 |
+
if start > dur:
|
155 |
+
break
|
156 |
+
|
157 |
+
to = (index + 1) * TIME_LENGTH / frames_per_3s
|
158 |
outputs.append(
|
159 |
{
|
160 |
+
"Frame": f"{format_second(start)} - {format_second(to)}",
|
161 |
"Tech": TRANSLATE[CLASSES[torch.argmax(pred).item()]],
|
162 |
}
|
163 |
)
|