Spaces:
Runtime error
Runtime error
File size: 3,467 Bytes
1ffd672 f8d0565 ff1bae1 b3ebb49 8d7cf3a b3ebb49 f8d0565 b3ebb49 4c71707 8d7cf3a b3ebb49 8112d7a f39c7a2 4c71707 b3ebb49 f8d0565 55c2b20 663ac44 fa6f9dd 5caf6ad 0f7a10e df07559 f8d0565 4fbd840 1ec80fa f8d0565 0c3b1db 464e88d aae5aef 4fbd840 464e88d f8d0565 464e88d b3ebb49 1ec80fa 464e88d f8d0565 b3ebb49 4fbd840 f8d0565 1ffd672 b3ebb49 3db5979 4fbd840 3db5979 f8d0565 1ec80fa 464e88d f8d0565 5d2138a f9db145 f083023 f9db145 f083023 f9db145 f083023 f8d0565 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import streamlit as st
import langcodes
from allosaurus.app import read_recognizer
from pathlib import Path
import string
from itertools import permutations
from collections import defaultdict
@st.cache
def get_supported_codes():
model = read_recognizer()
supported_codes = []
supported_codes.append("ipa") # default option
for combo in permutations(string.ascii_lowercase, r=3):
code = "".join(combo)
if model.is_available(code):
supported_codes.append(code)
return supported_codes
def get_path_to_wav_format(uploaded_file):
# st.write(dir(uploaded_file))
# st.write(type(uploaded_file))
# st.write(uploaded_file)
uploaded_bytes = uploaded_file.getvalue()
actual_file_path = Path(uploaded_file.name)
actual_file_path.write_bytes(uploaded_bytes)
if ".wav" in uploaded_file.name:
return Path(uploaded_file.name)
if ".mp3" in uploaded_file.name:
new_desired_path = actual_file_path.with_suffix(".wav")
waveform, sample_rate = torchaudio.load(actual_file_path)
st.info(waveform, sample_rate)
torchaudio.save(new_desired_path, waveform, sample_rate)
return new_desired_path
@st.cache
def get_langcode_description(input_code, url=False):
langcode = "ipa" # the default allosaurus recognizer
description = "the default universal setting, not specific to any language"
if not input_code or input_code==langcode:
return description
try:
lang = langcodes.get(input_code)
alpha3 = lang.to_alpha3()
langcode = alpha3
display_name = lang.display_name()
if url:
description = f"[{display_name}](https://iso639-3.sil.org/code/{alpha3})"
else:
description = display_name
except langcodes.LanguageTagError as e:
pass
return description
@st.cache
def get_langcode_with_description(input_code):
return f"{input_code}: {get_langcode_description(input_code)}"
if __name__ == "__main__":
# input_code = st.text_input("(optional) 2 or 3-letter ISO code for input language. 2-letter codes will be converted to 3-letter codes", max_chars=3)
supported_codes = get_supported_codes()
index_of_desired_default = supported_codes.index("ipa")
langcode = st.selectbox("ISO code for input language. Allosaurus doesn't need this, but it can improve accuracy",
options=supported_codes,
index=index_of_desired_default,
format_func=get_langcode_with_description
)
model = read_recognizer()
description = get_langcode_description(langcode, url=True)
st.write(f"Instructing Allosaurus to recognize using language {langcode}. That is, {description}")
uploaded_files = st.file_uploader("Choose a file", type=[
".wav",
# ".mp3", # TODO: convert .mp3 to .wav and save
],
accept_multiple_files=True,
)
for uploaded_file in uploaded_files:
if uploaded_file is not None:
st.audio(uploaded_file, format='audio/wav')
wav_file = get_path_to_wav_format(uploaded_file)
st.write(wav_file)
result = model.recognize(wav_file, langcode)
st.write(result)
|