import streamlit as st import langcodes from allosaurus.app import read_recognizer from pathlib import Path def get_path_to_wav_format(uploaded_file): # st.write(dir(uploaded_file)) # st.write(type(uploaded_file)) # st.write(uploaded_file) uploaded_bytes = uploaded_file.getvalue() actual_file_path = Path(uploaded_file.name) actual_file_path.write_bytes(uploaded_bytes) if ".wav" in uploaded_file.name: return Path(uploaded_file.name) if ".mp3" in uploaded_file.name: new_desired_path = actual_file_path.with_suffix(".wav") waveform, sample_rate = torchaudio.load(actual_file_path) st.info(waveform, sample_rate) torchaudio.save(new_desired_path, waveform, sample_rate) return new_desired_path def get_langcode_for_allosaurus(input_code): langcode = "ipa" # the default allosaurus recognizer description = "the default universal setting, not specific to any language" try: lang = langcodes.get(input_code) langcode = lang.to_alpha3() description = lang.display_name() except langcodes.LanguageTagError as e: pass return langcode, description if __name__ == "__main__": input_code = st.text_input("(optional) 2 or 3-letter ISO code for input language", max_chars=3) langcode, description = get_langcode_for_allosaurus(input_code) st.write(f"Instructing Allosaurus to recognize using language {langcode}. That is, {description}") model = read_recognizer() uploaded_files = st.file_uploader("Choose a file", type=[ ".wav", # ".mp3", # TODO: convert .mp3 to .wav and save ], accept_multiple_files=True, ) for uploaded_file in uploaded_files: if uploaded_file is not None: st.audio(uploaded_file, format='audio/wav') wav_file = get_path_to_wav_format(uploaded_file) st.write(wav_file) result = model.recognize(wav_file, langcode) st.write(result)