cdleong commited on
Commit
464e88d
·
1 Parent(s): df07559

Updating langcode search and checking for if it's valid.

Browse files
Files changed (1) hide show
  1. app.py +26 -7
app.py CHANGED
@@ -23,15 +23,29 @@ def get_path_to_wav_format(uploaded_file):
23
  return new_desired_path
24
 
25
 
26
- def get_langcode_for_allosaurus(input_code):
27
  langcode = "ipa" # the default allosaurus recognizer
28
  description = "the default universal setting, not specific to any language"
 
 
 
 
 
 
29
  try:
30
  lang = langcodes.get(input_code)
31
- langcode = lang.to_alpha3()
32
- description = lang.display_name()
 
 
 
 
 
 
 
33
  except langcodes.LanguageTagError as e:
34
- pass
 
35
  return langcode, description
36
 
37
 
@@ -39,11 +53,16 @@ def get_langcode_for_allosaurus(input_code):
39
 
40
 
41
  if __name__ == "__main__":
42
- input_code = st.text_input("(optional) 2 or 3-letter ISO code for input language", max_chars=3)
43
- langcode, description = get_langcode_for_allosaurus(input_code)
44
- st.write(f"Instructing Allosaurus to recognize using language {langcode}. That is, {description}")
 
45
 
46
  model = read_recognizer()
 
 
 
 
47
 
48
  uploaded_files = st.file_uploader("Choose a file", type=[
49
  ".wav",
 
23
  return new_desired_path
24
 
25
 
26
+ def get_langcode_for_allosaurus(input_code, model):
27
  langcode = "ipa" # the default allosaurus recognizer
28
  description = "the default universal setting, not specific to any language"
29
+
30
+ if not input_code:
31
+ return langcode, description
32
+
33
+
34
+
35
  try:
36
  lang = langcodes.get(input_code)
37
+ alpha3 = lang.to_alpha3()
38
+ if model.is_available(alpha3 ): # https://github.com/xinjli/allosaurus/blob/f5aa296dc97a90e3035f6cc0fd281cfb64536228/allosaurus/app.py#L57
39
+ langcode = alpha3
40
+ description = lang.display_name()
41
+ st.info(f"{langcode} is supported by Allosaurus}")
42
+ else:
43
+ # st.info(f"Could not find supported language for {input_code}")
44
+ st.error(f"Allosaurus doesn't recognize the language code {langcode}}. Perhaps try looking for [alternate codes](https://huggingface.co/spaces/cdleong/langcode-search). Or possibly it's just that there's no support for that particular language code. Proceeding with default setting")
45
+
46
  except langcodes.LanguageTagError as e:
47
+ st.error(f"langcodes library could not find a 3-letter ISO code for {input_code}. It may not be a valid code, try searching for [alternate codes](https://huggingface.co/spaces/cdleong/langcode-search). Proceeding with default setting"
48
+
49
  return langcode, description
50
 
51
 
 
53
 
54
 
55
  if __name__ == "__main__":
56
+ input_code = st.text_input("(optional) 2 or 3-letter ISO code for input language. 2-letter codes will be converted to 3-letter codes", max_chars=3)
57
+
58
+
59
+
60
 
61
  model = read_recognizer()
62
+ langcode, description = get_langcode_for_allosaurus(input_code)
63
+
64
+ st.write(f"Instructing Allosaurus to recognize using language {langcode}. That is, {description}")
65
+
66
 
67
  uploaded_files = st.file_uploader("Choose a file", type=[
68
  ".wav",