Spaces:
Runtime error
Runtime error
File size: 5,436 Bytes
1ffd672 f8d0565 ff1bae1 b3ebb49 8d7cf3a b3ebb49 dc83dff f8d0565 b3ebb49 4c71707 8d7cf3a b3ebb49 8112d7a f39c7a2 4c71707 b3ebb49 f8d0565 73363ec 663ac44 fa6f9dd 5caf6ad 0f7a10e baf12a8 df07559 74e2a8d df07559 73363ec 676b3fa 74e2a8d 676b3fa b77d75e df07559 f8d0565 4fbd840 1ec80fa f8d0565 0c3b1db 464e88d aae5aef 4fbd840 464e88d f8d0565 464e88d b3ebb49 1ec80fa 464e88d f8d0565 b3ebb49 4fbd840 f8d0565 1ffd672 71f3e01 f655e07 71f3e01 b3ebb49 cc4299c f8d0565 cc4299c f9db145 cc4299c 73363ec 71f3e01 cc4299c baf12a8 cc4299c 71f3e01 cc4299c 6e454d6 978694b cc4299c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import streamlit as st
import langcodes
from allosaurus.app import read_recognizer
from pathlib import Path
import string
from itertools import permutations
from collections import defaultdict
import torchaudio
@st.cache
def get_supported_codes():
model = read_recognizer()
supported_codes = []
supported_codes.append("ipa") # default option
for combo in permutations(string.ascii_lowercase, r=3):
code = "".join(combo)
if model.is_available(code):
supported_codes.append(code)
return supported_codes
def get_path_to_wav_format(uploaded_file, suppress_outputs=False):
# st.write(dir(uploaded_file))
# st.write(type(uploaded_file))
# st.write(uploaded_file)
uploaded_bytes = uploaded_file.getvalue()
actual_file_path = Path(uploaded_file.name)
actual_file_path.write_bytes(uploaded_bytes)
if ".wav" in uploaded_file.name:
return Path(uploaded_file.name)
if ".mp3" in uploaded_file.name or ".ogg" in uploaded_file.name:
new_desired_path = actual_file_path.with_suffix(".wav")
encoding="PCM_S" # Prevent encoding errors. https://stackoverflow.com/questions/60352850/wave-error-unknown-format-3-arises-when-trying-to-convert-a-wav-file-into-text
bits_per_sample=16
waveform, sample_rate = torchaudio.load(actual_file_path)
if not suppress_outputs:
st.info(f"Allosaurus requires .wav files. Converting with torchaudio, encoding={encoding}, bits_per_sample={bits_per_sample}")
st.info(f"Uploaded file sample_rate: {sample_rate}")
torchaudio.save(new_desired_path, waveform, sample_rate,
encoding=encoding,
bits_per_sample=bits_per_sample,
)
return new_desired_path
@st.cache
def get_langcode_description(input_code, url=False):
langcode = "ipa" # the default allosaurus recognizer
description = "the default universal setting, not specific to any language"
if not input_code or input_code==langcode:
return description
try:
lang = langcodes.get(input_code)
alpha3 = lang.to_alpha3()
langcode = alpha3
display_name = lang.display_name()
if url:
description = f"[{display_name}](https://iso639-3.sil.org/code/{alpha3})"
else:
description = display_name
except langcodes.LanguageTagError as e:
pass
return description
@st.cache
def get_langcode_with_description(input_code):
return f"{input_code}: {get_langcode_description(input_code)}"
if __name__ == "__main__":
st.header("Phonemize Audio files with [Allosaurus](https://github.com/xinjli/allosaurus)")
st.write("Allosaurus is a pretrained universal phone recognizer. It can be used to recognize phones in more than 2000 languages. It is written by Li, Xinjian and Dalmia, Siddharth and Li, Juncheng and Lee, Matthew and Littell, Patrick and Yao, Jiali and Anastasopoulos, Antonios and Mortensen, David R and Neubig, Graham and Black, Alan W and Florian, Metze. [Click here to visit their repository](https://github.com/xinjli/allosaurus)")
st.write("I, [Colin Leong](cdleong.github.io) did not create Allosaurus, but I have created this web app (kindly hosted by Hugging Face) to make it convenient to use: simply upload your files below, and they will be transcribed to phonetic IPA symbols!")
st.write(f"**Feedback:** Provide feedback regarding this web app at https://twitter.com/cleong110, or via slack: https://masakhane-nlp.slack.com/archives/D01DU3MHP7A")
supported_codes = get_supported_codes()
index_of_desired_default = supported_codes.index("ipa")
with st.form("Allosaurus form"):
langcode = st.selectbox("ISO code for input language. Allosaurus doesn't need this, but it can improve accuracy",
options=supported_codes,
index=index_of_desired_default,
format_func=get_langcode_with_description
)
model = read_recognizer()
description = get_langcode_description(langcode, url=True)
st.write(f"Instructing Allosaurus to recognize using language {langcode}. That is, {description}")
st.subheader("Upload your files here")
uploaded_files = st.file_uploader("Choose a file", type=[
".wav",
".mp3",
".ogg",
],
accept_multiple_files=True,
)
submitted = st.form_submit_button("Run phone recognition!")
if submitted:
results = {} # for better download/display
uploaded_files_count = len(uploaded_files)
suppress_output_threshold = 2
my_bar = st.progress(0)
for i, uploaded_file in enumerate(uploaded_files):
if uploaded_file is not None:
wav_file = get_path_to_wav_format(uploaded_file, uploaded_files_count>suppress_output_threshold)
with st.spinner(f"transcribing {uploaded_file.name}..."):
result = model.recognize(wav_file, langcode)
results[uploaded_file.name] = result
files_done = i+1
my_bar.progress(files_done/uploaded_files_count)
st.write(results) |