cdleong commited on
Commit
b3ebb49
·
1 Parent(s): 464e88d

Trying dropdown method

Browse files
Files changed (1) hide show
  1. app.py +22 -18
app.py CHANGED
@@ -2,7 +2,20 @@ import streamlit as st
2
  import langcodes
3
  from allosaurus.app import read_recognizer
4
  from pathlib import Path
 
 
 
5
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def get_path_to_wav_format(uploaded_file):
8
  # st.write(dir(uploaded_file))
@@ -23,40 +36,31 @@ def get_path_to_wav_format(uploaded_file):
23
  return new_desired_path
24
 
25
 
26
- def get_langcode_for_allosaurus(input_code, model):
27
  langcode = "ipa" # the default allosaurus recognizer
28
  description = "the default universal setting, not specific to any language"
29
 
30
  if not input_code:
31
- return langcode, description
32
 
33
 
34
 
35
  try:
36
  lang = langcodes.get(input_code)
37
  alpha3 = lang.to_alpha3()
38
- if model.is_available(alpha3 ): # https://github.com/xinjli/allosaurus/blob/f5aa296dc97a90e3035f6cc0fd281cfb64536228/allosaurus/app.py#L57
39
- langcode = alpha3
40
- description = lang.display_name()
41
- st.info(f"{langcode} is supported by Allosaurus}")
42
- else:
43
- # st.info(f"Could not find supported language for {input_code}")
44
- st.error(f"Allosaurus doesn't recognize the language code {langcode}}. Perhaps try looking for [alternate codes](https://huggingface.co/spaces/cdleong/langcode-search). Or possibly it's just that there's no support for that particular language code. Proceeding with default setting")
45
 
46
  except langcodes.LanguageTagError as e:
47
- st.error(f"langcodes library could not find a 3-letter ISO code for {input_code}. It may not be a valid code, try searching for [alternate codes](https://huggingface.co/spaces/cdleong/langcode-search). Proceeding with default setting"
48
-
49
- return langcode, description
50
-
51
-
52
 
53
 
54
 
55
  if __name__ == "__main__":
56
- input_code = st.text_input("(optional) 2 or 3-letter ISO code for input language. 2-letter codes will be converted to 3-letter codes", max_chars=3)
57
-
58
-
59
-
60
 
61
  model = read_recognizer()
62
  langcode, description = get_langcode_for_allosaurus(input_code)
 
2
  import langcodes
3
  from allosaurus.app import read_recognizer
4
  from pathlib import Path
5
+ import string
6
+ from itertools import combinations
7
+ from collections import defaultdict
8
 
9
+ @st.cache
10
+ def get_supported_codes():
11
+ model = read_recognizer()
12
+ supported_codes = []
13
+ for combo in combinations(string.ascii_lowercase, 3):
14
+ code = "".join(combo)
15
+ if model.is_available(code):
16
+ supported_codes.append(combo)
17
+ return supported_codes
18
+
19
 
20
  def get_path_to_wav_format(uploaded_file):
21
  # st.write(dir(uploaded_file))
 
36
  return new_desired_path
37
 
38
 
39
+ def get_langcode_description(input_code):
40
  langcode = "ipa" # the default allosaurus recognizer
41
  description = "the default universal setting, not specific to any language"
42
 
43
  if not input_code:
44
+ return description
45
 
46
 
47
 
48
  try:
49
  lang = langcodes.get(input_code)
50
  alpha3 = lang.to_alpha3()
51
+ langcode = alpha3
52
+ description = lang.display_name()
 
 
 
 
 
53
 
54
  except langcodes.LanguageTagError as e:
55
+ pass
56
+ return description
 
 
 
57
 
58
 
59
 
60
  if __name__ == "__main__":
61
+ # input_code = st.text_input("(optional) 2 or 3-letter ISO code for input language. 2-letter codes will be converted to 3-letter codes", max_chars=3)
62
+ supported_codes = get_supported_codes()
63
+ input_code = st.selectbox("ISO code for input language. Allosaurus doesn't need this, but it can improve accuracy", options=supported_codes, format_func=get_langcode_description)
 
64
 
65
  model = read_recognizer()
66
  langcode, description = get_langcode_for_allosaurus(input_code)