Spaces:

cdleong
/

phonemize-audio

Runtime error

File size: 2,205 Bytes

1ffd672
f8d0565
 
ff1bae1
f8d0565
 
55c2b20
663ac44
 
 
 
 
 
 
fa6f9dd
5caf6ad
0f7a10e
663ac44
 
 
 
 
 
f8d0565
 
 
 
0c3b1db
f8d0565
 
 
0c3b1db
f8d0565
 
0c3b1db
f8d0565
 
 
 
1ffd672
 
f8d0565
0c3b1db
c198ba3
f8d0565
 
 
5d2138a
f9db145
f083023
 
 
 
f9db145
f083023
f9db145
f083023
 
 
 
 
 
 
 
 
 
f8d0565

import streamlit as st
import langcodes
from allosaurus.app import read_recognizer
from pathlib import Path

  
def get_path_to_wav_format(uploaded_file):
#  st.write(dir(uploaded_file))
#  st.write(type(uploaded_file))
#  st.write(uploaded_file)
  uploaded_bytes = uploaded_file.getvalue()
  actual_file_path = Path(uploaded_file.name)
  actual_file_path.write_bytes(uploaded_bytes)
    

  if ".wav" in uploaded_file.name:
    return Path(uploaded_file.name)
 if ".mp3" in uploaded_file.name:
   new_desired_path = actual_file_path.with_suffix(".wav")
   waveform, sample_rate = torchaudio.load(actual_file_path)
   st.info(waveform, sample_rate)
   torchaudio.save(new_desired_path, waveform, sample_rate)
   return new_desired_path


def get_langcode_for_allosaurus(input_code):
  langcode = "ipa" # the default allosaurus recognizer
  description = "the default universal setting, not specific to any language"
  try:
    lang = langcodes.get(input_code)
    langcode = lang.to_alpha3()
    description = lang.display_name()
  except langcodes.LanguageTagError as e:
    pass
  return langcode, description 
  
  
  


if __name__ == "__main__":
  input_code = st.text_input("(optional) 2 or 3-letter ISO code for input language", max_chars=3)
  langcode, description = get_langcode_for_allosaurus(input_code)
  st.write(f"Instructing Allosaurus to recognize using language {langcode}. That is, {description}")
  
  model = read_recognizer()
  
  uploaded_files = st.file_uploader("Choose a file", type=[
                                                          ".wav", 
#                                                          ".mp3", # TODO: convert .mp3 to .wav and save
                                                          ],
                                                    accept_multiple_files=True,
                                                          )
  
  for uploaded_file in uploaded_files:
    
    if uploaded_file is not None:
  
      
      
      st.audio(uploaded_file, format='audio/wav')
      
      wav_file = get_path_to_wav_format(uploaded_file)    
      st.write(wav_file)
      result = model.recognize(wav_file, langcode)
      st.write(result)