Spaces:

cdleong
/

phonemize-audio

Runtime error

App Files Files Community

phonemize-audio / app.py

cdleong

Update app.py

b77d75e about 3 years ago

raw

history blame

3.62 kB

	import streamlit as st
	import langcodes
	from allosaurus.app import read_recognizer
	from pathlib import Path
	import string
	from itertools import permutations
	from collections import defaultdict
	import torchaudio

	@st.cache
	def get_supported_codes():
	model = read_recognizer()
	supported_codes = []
	supported_codes.append("ipa") # default option
	for combo in permutations(string.ascii_lowercase, r=3):
	code = "".join(combo)
	if model.is_available(code):
	supported_codes.append(code)


	return supported_codes


	def get_path_to_wav_format(uploaded_file):
	# st.write(dir(uploaded_file))
	# st.write(type(uploaded_file))
	# st.write(uploaded_file)
	uploaded_bytes = uploaded_file.getvalue()
	actual_file_path = Path(uploaded_file.name)
	actual_file_path.write_bytes(uploaded_bytes)


	if ".wav" in uploaded_file.name:
	return Path(uploaded_file.name)
	if ".mp3" in uploaded_file.name:
	new_desired_path = actual_file_path.with_suffix(".wav")
	st.info(new_desired_path)
	waveform, sample_rate = torchaudio.load(actual_file_path)
	st.info(f"waveform, sample_rate: {waveform}, {sample_rate}")
	torchaudio.save(new_desired_path, waveform, sample_rate)

	return new_desired_path

	@st.cache
	def get_langcode_description(input_code, url=False):
	langcode = "ipa" # the default allosaurus recognizer
	description = "the default universal setting, not specific to any language"

	if not input_code or input_code==langcode:
	return description



	try:
	lang = langcodes.get(input_code)
	alpha3 = lang.to_alpha3()
	langcode = alpha3
	display_name = lang.display_name()
	if url:
	description = f"[{display_name}](https://iso639-3.sil.org/code/{alpha3})"
	else:
	description = display_name

	except langcodes.LanguageTagError as e:
	pass
	return description

	@st.cache
	def get_langcode_with_description(input_code):
	return f"{input_code}: {get_langcode_description(input_code)}"


	if __name__ == "__main__":
	# input_code = st.text_input("(optional) 2 or 3-letter ISO code for input language. 2-letter codes will be converted to 3-letter codes", max_chars=3)
	supported_codes = get_supported_codes()
	index_of_desired_default = supported_codes.index("ipa")
	langcode = st.selectbox("ISO code for input language. Allosaurus doesn't need this, but it can improve accuracy",
	options=supported_codes,
	index=index_of_desired_default,
	format_func=get_langcode_with_description
	)

	model = read_recognizer()
	description = get_langcode_description(langcode, url=True)

	st.write(f"Instructing Allosaurus to recognize using language {langcode}. That is, {description}")


	uploaded_files = st.file_uploader("Choose a file", type=[
	".wav",
	".mp3",
	],
	accept_multiple_files=True,
	)

	results = {} # for better download/display
	for uploaded_file in uploaded_files:

	if uploaded_file is not None:



	st.audio(uploaded_file, format='audio/wav')

	wav_file = get_path_to_wav_format(uploaded_file)
	# st.write(wav_file)
	result = model.recognize(wav_file, langcode)
	results[uploaded_file.name] = result
	# st.write(result)

	st.write(results)