phonemize-audio / app.py
cdleong's picture
Update app.py
b77d75e
raw
history blame
3.62 kB
import streamlit as st
import langcodes
from allosaurus.app import read_recognizer
from pathlib import Path
import string
from itertools import permutations
from collections import defaultdict
import torchaudio
@st.cache
def get_supported_codes():
model = read_recognizer()
supported_codes = []
supported_codes.append("ipa") # default option
for combo in permutations(string.ascii_lowercase, r=3):
code = "".join(combo)
if model.is_available(code):
supported_codes.append(code)
return supported_codes
def get_path_to_wav_format(uploaded_file):
# st.write(dir(uploaded_file))
# st.write(type(uploaded_file))
# st.write(uploaded_file)
uploaded_bytes = uploaded_file.getvalue()
actual_file_path = Path(uploaded_file.name)
actual_file_path.write_bytes(uploaded_bytes)
if ".wav" in uploaded_file.name:
return Path(uploaded_file.name)
if ".mp3" in uploaded_file.name:
new_desired_path = actual_file_path.with_suffix(".wav")
st.info(new_desired_path)
waveform, sample_rate = torchaudio.load(actual_file_path)
st.info(f"waveform, sample_rate: {waveform}, {sample_rate}")
torchaudio.save(new_desired_path, waveform, sample_rate)
return new_desired_path
@st.cache
def get_langcode_description(input_code, url=False):
langcode = "ipa" # the default allosaurus recognizer
description = "the default universal setting, not specific to any language"
if not input_code or input_code==langcode:
return description
try:
lang = langcodes.get(input_code)
alpha3 = lang.to_alpha3()
langcode = alpha3
display_name = lang.display_name()
if url:
description = f"[{display_name}](https://iso639-3.sil.org/code/{alpha3})"
else:
description = display_name
except langcodes.LanguageTagError as e:
pass
return description
@st.cache
def get_langcode_with_description(input_code):
return f"{input_code}: {get_langcode_description(input_code)}"
if __name__ == "__main__":
# input_code = st.text_input("(optional) 2 or 3-letter ISO code for input language. 2-letter codes will be converted to 3-letter codes", max_chars=3)
supported_codes = get_supported_codes()
index_of_desired_default = supported_codes.index("ipa")
langcode = st.selectbox("ISO code for input language. Allosaurus doesn't need this, but it can improve accuracy",
options=supported_codes,
index=index_of_desired_default,
format_func=get_langcode_with_description
)
model = read_recognizer()
description = get_langcode_description(langcode, url=True)
st.write(f"Instructing Allosaurus to recognize using language {langcode}. That is, {description}")
uploaded_files = st.file_uploader("Choose a file", type=[
".wav",
".mp3",
],
accept_multiple_files=True,
)
results = {} # for better download/display
for uploaded_file in uploaded_files:
if uploaded_file is not None:
st.audio(uploaded_file, format='audio/wav')
wav_file = get_path_to_wav_format(uploaded_file)
# st.write(wav_file)
result = model.recognize(wav_file, langcode)
results[uploaded_file.name] = result
# st.write(result)
st.write(results)