Flux9665 commited on
Commit
97bcef9
1 Parent(s): 4e10cff

try to figure out how ZeroGPU works

Browse files
InferenceInterfaces/ToucanTTSInterface.py CHANGED
@@ -107,7 +107,7 @@ class ToucanTTSInterface(torch.nn.Module):
107
  self.set_accent_language(lang_id=lang_id)
108
 
109
  def set_phonemizer_language(self, lang_id):
110
- self.text2phone = ArticulatoryCombinedTextFrontend(language=lang_id, add_silence_to_end=True)
111
 
112
  def set_accent_language(self, lang_id):
113
  if lang_id in ['ajp', 'ajt', 'lak', 'lno', 'nul', 'pii', 'plj', 'slq', 'smd', 'snb', 'tpw', 'wya', 'zua', 'en-us', 'en-sc', 'fr-be', 'fr-sw', 'pt-br', 'spa-lat', 'vi-ctr', 'vi-so']:
 
107
  self.set_accent_language(lang_id=lang_id)
108
 
109
  def set_phonemizer_language(self, lang_id):
110
+ self.text2phone.change_lang(language=lang_id, add_silence_to_end=True)
111
 
112
  def set_accent_language(self, lang_id):
113
  if lang_id in ['ajp', 'ajt', 'lak', 'lno', 'nul', 'pii', 'plj', 'slq', 'smd', 'snb', 'tpw', 'wya', 'zua', 'en-us', 'en-sc', 'fr-be', 'fr-sw', 'pt-br', 'spa-lat', 'vi-ctr', 'vi-so']:
Preprocessing/TextFrontend.py CHANGED
@@ -567,6 +567,543 @@ class ArticulatoryCombinedTextFrontend:
567
  self.id_to_phone = {v: k for k, v in self.phone_to_id.items()}
568
  self.text_vector_to_phone_cache = dict()
569
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  @staticmethod
571
  def get_example_sentence(lang):
572
  if lang == "eng":
 
567
  self.id_to_phone = {v: k for k, v in self.phone_to_id.items()}
568
  self.text_vector_to_phone_cache = dict()
569
 
570
+ def change_lang(self, language,
571
+ use_explicit_eos=True,
572
+ use_lexical_stress=True,
573
+ silent=True,
574
+ add_silence_to_end=True,
575
+ use_word_boundaries=True):
576
+ self.language = language
577
+ self.use_explicit_eos = use_explicit_eos
578
+ self.use_stress = use_lexical_stress
579
+ self.add_silence_to_end = add_silence_to_end
580
+ self.use_word_boundaries = use_word_boundaries
581
+ from transphone.g2p import read_g2p
582
+
583
+ register_to_height = {
584
+ "˥": 5,
585
+ "˦": 4,
586
+ "˧": 3,
587
+ "˨": 2,
588
+ "˩": 1
589
+ }
590
+ self.rising_perms = list()
591
+ self.falling_perms = list()
592
+ self.peaking_perms = list()
593
+ self.dipping_perms = list()
594
+
595
+ for first_tone in ["˥", "˦", "˧", "˨", "˩"]:
596
+ for second_tone in ["˥", "˦", "˧", "˨", "˩"]:
597
+ if register_to_height[first_tone] > register_to_height[second_tone]:
598
+ self.falling_perms.append(first_tone + second_tone)
599
+ else:
600
+ self.rising_perms.append(first_tone + second_tone)
601
+ for third_tone in ["˥", "˦", "˧", "˨", "˩"]:
602
+ if register_to_height[first_tone] > register_to_height[second_tone] < register_to_height[third_tone]:
603
+ self.dipping_perms.append(first_tone + second_tone + third_tone)
604
+ elif register_to_height[first_tone] < register_to_height[second_tone] > register_to_height[third_tone]:
605
+ self.peaking_perms.append(first_tone + second_tone + third_tone)
606
+
607
+ if language == "eng":
608
+ self.g2p_lang = "en-us" # English as spoken in USA
609
+ self.expand_abbreviations = english_text_expansion
610
+ self.phonemizer = "espeak"
611
+
612
+ elif language == "deu":
613
+ self.g2p_lang = "de" # German
614
+ self.expand_abbreviations = lambda x: x
615
+ self.phonemizer = "espeak"
616
+
617
+ elif language == "ell":
618
+ self.g2p_lang = "el" # Greek
619
+ self.expand_abbreviations = lambda x: x
620
+ self.phonemizer = "espeak"
621
+
622
+ elif language == "spa":
623
+ self.g2p_lang = "es" # Spanish
624
+ self.expand_abbreviations = lambda x: x
625
+ self.phonemizer = "espeak"
626
+
627
+ elif language == "fin":
628
+ self.g2p_lang = "fi" # Finnish
629
+ self.expand_abbreviations = lambda x: x
630
+ self.phonemizer = "espeak"
631
+
632
+ elif language == "rus":
633
+ self.g2p_lang = "ru" # Russian
634
+ self.expand_abbreviations = lambda x: x
635
+ self.phonemizer = "espeak"
636
+
637
+ elif language == "hun":
638
+ self.g2p_lang = "hu" # Hungarian
639
+ self.expand_abbreviations = lambda x: x
640
+ self.phonemizer = "espeak"
641
+
642
+ elif language == "nld":
643
+ self.g2p_lang = "nl" # Dutch
644
+ self.expand_abbreviations = lambda x: x
645
+ self.phonemizer = "espeak"
646
+
647
+ elif language == "fra":
648
+ self.g2p_lang = "fr-fr" # French
649
+ self.expand_abbreviations = remove_french_spacing
650
+ self.phonemizer = "espeak"
651
+
652
+ elif language == "ita":
653
+ self.g2p_lang = "it" # Italian
654
+ self.expand_abbreviations = lambda x: x
655
+ self.phonemizer = "espeak"
656
+
657
+ elif language == "por":
658
+ self.g2p_lang = "pt" # Portuguese
659
+ self.expand_abbreviations = lambda x: x
660
+ self.phonemizer = "espeak"
661
+
662
+ elif language == "pol":
663
+ self.g2p_lang = "pl" # Polish
664
+ self.expand_abbreviations = lambda x: x
665
+ self.phonemizer = "espeak"
666
+
667
+ elif language == "cmn":
668
+ self.g2p_lang = "cmn" # Mandarin
669
+ self.expand_abbreviations = convert_kanji_to_pinyin_mandarin
670
+ self.phonemizer = "dragonmapper"
671
+
672
+ elif language == "vie":
673
+ self.g2p_lang = "vi" # Northern Vietnamese
674
+ self.expand_abbreviations = lambda x: x
675
+ self.phonemizer = "espeak"
676
+
677
+ elif language == "ukr":
678
+ self.g2p_lang = "uk" # Ukrainian
679
+ self.expand_abbreviations = lambda x: x
680
+ self.phonemizer = "espeak"
681
+
682
+ elif language == "pes":
683
+ self.g2p_lang = "fa" # Western Farsi
684
+ self.expand_abbreviations = lambda x: x
685
+ self.phonemizer = "espeak"
686
+
687
+ elif language == "afr":
688
+ self.g2p_lang = "af" # Afrikaans
689
+ self.expand_abbreviations = lambda x: x
690
+ self.phonemizer = "espeak"
691
+
692
+ elif language == "aln":
693
+ self.g2p_lang = "sq" # Albanian
694
+ self.expand_abbreviations = lambda x: x
695
+ self.phonemizer = "espeak"
696
+
697
+ elif language == "amh":
698
+ self.g2p_lang = "am" # Amharic
699
+ self.expand_abbreviations = lambda x: x
700
+ self.phonemizer = "espeak"
701
+
702
+ elif language == "arb":
703
+ self.g2p_lang = "ar" # Arabic
704
+ self.expand_abbreviations = lambda x: x
705
+ self.phonemizer = "espeak"
706
+
707
+ elif language == "arg":
708
+ self.g2p_lang = "an" # Aragonese
709
+ self.expand_abbreviations = lambda x: x
710
+ self.phonemizer = "espeak"
711
+
712
+ elif language == "hye":
713
+ self.g2p_lang = "hy" # East Armenian
714
+ self.expand_abbreviations = lambda x: x
715
+ self.phonemizer = "espeak"
716
+
717
+ elif language == "hyw":
718
+ self.g2p_lang = "hyw" # West Armenian
719
+ self.expand_abbreviations = lambda x: x
720
+ self.phonemizer = "espeak"
721
+
722
+ elif language == "azj":
723
+ self.g2p_lang = "az" # Azerbaijani
724
+ self.expand_abbreviations = lambda x: x
725
+ self.phonemizer = "espeak"
726
+
727
+ elif language == "bak":
728
+ self.g2p_lang = "ba" # Bashkir
729
+ self.expand_abbreviations = lambda x: x
730
+ self.phonemizer = "espeak"
731
+
732
+ elif language == "eus":
733
+ self.g2p_lang = "eu" # Basque
734
+ self.expand_abbreviations = lambda x: x
735
+ self.phonemizer = "espeak"
736
+
737
+ elif language == "bel":
738
+ self.g2p_lang = "be" # Belarusian
739
+ self.expand_abbreviations = lambda x: x
740
+ self.phonemizer = "espeak"
741
+
742
+ elif language == "ben":
743
+ self.g2p_lang = "bn" # Bengali
744
+ self.expand_abbreviations = lambda x: x
745
+ self.phonemizer = "espeak"
746
+
747
+ elif language == "bpy":
748
+ self.g2p_lang = "bpy" # Bishnupriya Manipuri
749
+ self.expand_abbreviations = lambda x: x
750
+ self.phonemizer = "espeak"
751
+
752
+ elif language == "bos":
753
+ self.g2p_lang = "bs" # Bosnian
754
+ self.expand_abbreviations = lambda x: x
755
+ self.phonemizer = "espeak"
756
+
757
+ elif language == "bul":
758
+ self.g2p_lang = "bg" # Bulgarian
759
+ self.expand_abbreviations = lambda x: x
760
+ self.phonemizer = "espeak"
761
+
762
+ elif language == "mya":
763
+ self.g2p_lang = "my" # Burmese
764
+ self.expand_abbreviations = lambda x: x
765
+ self.phonemizer = "espeak"
766
+
767
+ elif language == "chr":
768
+ self.g2p_lang = "chr" # Cherokee
769
+ self.expand_abbreviations = lambda x: x
770
+ self.phonemizer = "espeak"
771
+
772
+ elif language == "yue":
773
+ self.g2p_lang = "yue" # Chinese Cantonese
774
+ self.expand_abbreviations = lambda x: x
775
+ self.phonemizer = "espeak"
776
+
777
+ elif language == "hak":
778
+ self.g2p_lang = "hak" # Chinese Hakka
779
+ self.expand_abbreviations = lambda x: x
780
+ self.phonemizer = "espeak"
781
+
782
+ elif language == "haw":
783
+ self.g2p_lang = "haw" # Hawaiian
784
+ self.expand_abbreviations = lambda x: x
785
+ self.phonemizer = "espeak"
786
+
787
+ elif language == "hrv":
788
+ self.g2p_lang = "hr" # Croatian
789
+ self.expand_abbreviations = lambda x: x
790
+ self.phonemizer = "espeak"
791
+
792
+ elif language == "ces":
793
+ self.g2p_lang = "cs" # Czech
794
+ self.expand_abbreviations = lambda x: x
795
+ self.phonemizer = "espeak"
796
+
797
+ elif language == "dan":
798
+ self.g2p_lang = "da" # Danish
799
+ self.expand_abbreviations = lambda x: x
800
+ self.phonemizer = "espeak"
801
+
802
+ elif language == "ekk":
803
+ self.g2p_lang = "et" # Estonian
804
+ self.expand_abbreviations = lambda x: x
805
+ self.phonemizer = "espeak"
806
+
807
+ elif language == "gle":
808
+ self.g2p_lang = "ga" # Gaelic Irish
809
+ self.expand_abbreviations = lambda x: x
810
+ self.phonemizer = "espeak"
811
+
812
+ elif language == "gla":
813
+ self.g2p_lang = "gd" # Gaelic Scottish
814
+ self.expand_abbreviations = lambda x: x
815
+ self.phonemizer = "espeak"
816
+
817
+ elif language == "kat":
818
+ self.g2p_lang = "ka" # Georgian
819
+ self.expand_abbreviations = lambda x: x
820
+ self.phonemizer = "espeak"
821
+
822
+ elif language == "kal":
823
+ self.g2p_lang = "kl" # Greenlandic
824
+ self.expand_abbreviations = lambda x: x
825
+ self.phonemizer = "espeak"
826
+
827
+ elif language == "guj":
828
+ self.g2p_lang = "gu" # Gujarati
829
+ self.expand_abbreviations = lambda x: x
830
+ self.phonemizer = "espeak"
831
+
832
+ elif language == "heb":
833
+ self.g2p_lang = "he" # Hebrew
834
+ self.expand_abbreviations = lambda x: x
835
+ self.phonemizer = "espeak"
836
+
837
+ elif language == "hin":
838
+ self.g2p_lang = "hi" # Hindi
839
+ self.expand_abbreviations = lambda x: x
840
+ self.phonemizer = "espeak"
841
+
842
+ elif language == "isl":
843
+ self.g2p_lang = "is" # Icelandic
844
+ self.expand_abbreviations = lambda x: x
845
+ self.phonemizer = "espeak"
846
+
847
+ elif language == "ind":
848
+ self.g2p_lang = "id" # Indonesian
849
+ self.expand_abbreviations = lambda x: x
850
+ self.phonemizer = "espeak"
851
+
852
+ elif language == "jpn":
853
+ self.g2p_lang = "ja" # Japanese
854
+ self.expand_abbreviations = lambda x: x
855
+ self.phonemizer = "espeak"
856
+
857
+ elif language == "kan":
858
+ self.g2p_lang = "kn" # Kannada
859
+ self.expand_abbreviations = lambda x: x
860
+ self.phonemizer = "espeak"
861
+
862
+ elif language == "knn":
863
+ self.g2p_lang = "kok" # Konkani
864
+ self.expand_abbreviations = lambda x: x
865
+ self.phonemizer = "espeak"
866
+
867
+ elif language == "kor":
868
+ self.g2p_lang = "ko" # Korean
869
+ self.expand_abbreviations = lambda x: x
870
+ self.phonemizer = "espeak"
871
+
872
+ elif language == "ckb":
873
+ self.g2p_lang = "ku" # Kurdish
874
+ self.expand_abbreviations = lambda x: x
875
+ self.phonemizer = "espeak"
876
+
877
+ elif language == "kaz":
878
+ self.g2p_lang = "kk" # Kazakh
879
+ self.expand_abbreviations = lambda x: x
880
+ self.phonemizer = "espeak"
881
+
882
+ elif language == "kir":
883
+ self.g2p_lang = "ky" # Kyrgyz
884
+ self.expand_abbreviations = lambda x: x
885
+ self.phonemizer = "espeak"
886
+
887
+ elif language == "lat":
888
+ self.g2p_lang = "la" # Latin
889
+ self.expand_abbreviations = lambda x: x
890
+ self.phonemizer = "espeak"
891
+
892
+ elif language == "ltz":
893
+ self.g2p_lang = "lb" # Luxembourgish
894
+ self.expand_abbreviations = lambda x: x
895
+ self.phonemizer = "espeak"
896
+
897
+ elif language == "lvs":
898
+ self.g2p_lang = "lv" # Latvian
899
+ self.expand_abbreviations = lambda x: x
900
+ self.phonemizer = "espeak"
901
+
902
+ elif language == "lit":
903
+ self.g2p_lang = "lt" # Lithuanian
904
+ self.expand_abbreviations = lambda x: x
905
+ self.phonemizer = "espeak"
906
+
907
+ elif language == "mri":
908
+ self.g2p_lang = "mi" # Māori
909
+ self.expand_abbreviations = lambda x: x
910
+ self.phonemizer = "espeak"
911
+
912
+ elif language == "mkd":
913
+ self.g2p_lang = "mk" # Macedonian
914
+ self.expand_abbreviations = lambda x: x
915
+ self.phonemizer = "espeak"
916
+
917
+ elif language == "zlm":
918
+ self.g2p_lang = "ms" # Malay
919
+ self.expand_abbreviations = lambda x: x
920
+ self.phonemizer = "espeak"
921
+
922
+ elif language == "mal":
923
+ self.g2p_lang = "ml" # Malayalam
924
+ self.expand_abbreviations = lambda x: x
925
+ self.phonemizer = "espeak"
926
+
927
+ elif language == "mlt":
928
+ self.g2p_lang = "mt" # Maltese
929
+ self.expand_abbreviations = lambda x: x
930
+ self.phonemizer = "espeak"
931
+
932
+ elif language == "mar":
933
+ self.g2p_lang = "mr" # Marathi
934
+ self.expand_abbreviations = lambda x: x
935
+ self.phonemizer = "espeak"
936
+
937
+ elif language == "nci":
938
+ self.g2p_lang = "nci" # Nahuatl
939
+ self.expand_abbreviations = lambda x: x
940
+ self.phonemizer = "espeak"
941
+
942
+ elif language == "npi":
943
+ self.g2p_lang = "ne" # Nepali
944
+ self.expand_abbreviations = lambda x: x
945
+ self.phonemizer = "espeak"
946
+
947
+ elif language == "nob":
948
+ self.g2p_lang = "nb" # Norwegian Bokmål
949
+ self.expand_abbreviations = lambda x: x
950
+ self.phonemizer = "espeak"
951
+
952
+ elif language == "nog":
953
+ self.g2p_lang = "nog" # Nogai
954
+ self.expand_abbreviations = lambda x: x
955
+ self.phonemizer = "espeak"
956
+
957
+ elif language == "ory":
958
+ self.g2p_lang = "or" # Oriya
959
+ self.expand_abbreviations = lambda x: x
960
+ self.phonemizer = "espeak"
961
+
962
+ elif language == "gaz":
963
+ self.g2p_lang = "om" # Oromo
964
+ self.expand_abbreviations = lambda x: x
965
+ self.phonemizer = "espeak"
966
+
967
+ elif language == "pap":
968
+ self.g2p_lang = "pap" # Papiamento
969
+ self.expand_abbreviations = lambda x: x
970
+ self.phonemizer = "espeak"
971
+
972
+ elif language == "pan":
973
+ self.g2p_lang = "pa" # Punjabi
974
+ self.expand_abbreviations = lambda x: x
975
+ self.phonemizer = "espeak"
976
+
977
+ elif language == "ron":
978
+ self.g2p_lang = "ro" # Romanian
979
+ self.expand_abbreviations = lambda x: x
980
+ self.phonemizer = "espeak"
981
+
982
+ elif language == "lav":
983
+ self.g2p_lang = "ru-lv" # Russian Latvia
984
+ self.expand_abbreviations = lambda x: x
985
+ self.phonemizer = "espeak"
986
+
987
+ elif language == "srp":
988
+ self.g2p_lang = "sr" # Serbian
989
+ self.expand_abbreviations = lambda x: x
990
+ self.phonemizer = "espeak"
991
+
992
+ elif language == "tsn":
993
+ self.g2p_lang = "tn" # Setswana
994
+ self.expand_abbreviations = lambda x: x
995
+ self.phonemizer = "espeak"
996
+
997
+ elif language == "snd":
998
+ self.g2p_lang = "sd" # Sindhi
999
+ self.expand_abbreviations = lambda x: x
1000
+ self.phonemizer = "espeak"
1001
+
1002
+ elif language == "slk":
1003
+ self.g2p_lang = "sk" # Slovak
1004
+ self.expand_abbreviations = lambda x: x
1005
+ self.phonemizer = "espeak"
1006
+
1007
+ elif language == "slv":
1008
+ self.g2p_lang = "sl" # Slovenian
1009
+ self.expand_abbreviations = lambda x: x
1010
+ self.phonemizer = "espeak"
1011
+
1012
+ elif language == "smj":
1013
+ self.g2p_lang = "smj" # Lule Saami
1014
+ self.expand_abbreviations = lambda x: x
1015
+ self.phonemizer = "espeak"
1016
+
1017
+ elif language == "swh":
1018
+ self.g2p_lang = "sw" # Swahili
1019
+ self.expand_abbreviations = lambda x: x
1020
+ self.phonemizer = "espeak"
1021
+
1022
+ elif language == "swe":
1023
+ self.g2p_lang = "sv" # Swedish
1024
+ self.expand_abbreviations = lambda x: x
1025
+ self.phonemizer = "espeak"
1026
+
1027
+ elif language == "tam":
1028
+ self.g2p_lang = "ta" # Tamil
1029
+ self.expand_abbreviations = lambda x: x
1030
+ self.phonemizer = "espeak"
1031
+
1032
+ elif language == "tha":
1033
+ self.g2p_lang = "th" # Thai
1034
+ self.expand_abbreviations = lambda x: x
1035
+ self.phonemizer = "espeak"
1036
+
1037
+ elif language == "tuk":
1038
+ self.g2p_lang = "tk" # Turkmen
1039
+ self.expand_abbreviations = lambda x: x
1040
+ self.phonemizer = "espeak"
1041
+
1042
+ elif language == "tat":
1043
+ self.g2p_lang = "tt" # Tatar
1044
+ self.expand_abbreviations = lambda x: x
1045
+ self.phonemizer = "espeak"
1046
+
1047
+ elif language == "tel":
1048
+ self.g2p_lang = "te" # Telugu
1049
+ self.expand_abbreviations = lambda x: x
1050
+ self.phonemizer = "espeak"
1051
+
1052
+ elif language == "tur":
1053
+ self.g2p_lang = "tr" # Turkish
1054
+ self.expand_abbreviations = lambda x: x
1055
+ self.phonemizer = "espeak"
1056
+
1057
+ elif language == "uig":
1058
+ self.g2p_lang = "ug" # Uyghur
1059
+ self.expand_abbreviations = lambda x: x
1060
+ self.phonemizer = "espeak"
1061
+
1062
+ elif language == "urd":
1063
+ self.g2p_lang = "ur" # Urdu
1064
+ self.expand_abbreviations = lambda x: x
1065
+ self.phonemizer = "espeak"
1066
+
1067
+ elif language == "uzn":
1068
+ self.g2p_lang = "uz" # Uzbek
1069
+ self.expand_abbreviations = lambda x: x
1070
+ self.phonemizer = "espeak"
1071
+
1072
+ elif language == "cym":
1073
+ self.g2p_lang = "cy" # Welsh
1074
+ self.expand_abbreviations = lambda x: x
1075
+ self.phonemizer = "espeak"
1076
+
1077
+ else:
1078
+ # blanket solution for the rest
1079
+ self.g2p_lang = language
1080
+ self.phonemizer = "transphone"
1081
+ self.expand_abbreviations = lambda x: x
1082
+ self.transphone = read_g2p()
1083
+
1084
+ # remember to also update get_language_id() below when adding something here, as well as the get_example_sentence function
1085
+
1086
+ if self.phonemizer == "espeak":
1087
+ try:
1088
+ self.phonemizer_backend = EspeakBackend(language=self.g2p_lang,
1089
+ punctuation_marks=';:,.!?¡¿—…"«»“”~/。【】、‥،؟“”؛',
1090
+ preserve_punctuation=True,
1091
+ language_switch='remove-flags',
1092
+ with_stress=self.use_stress)
1093
+ except RuntimeError:
1094
+ print("Error in loading espeak! \n"
1095
+ "Maybe espeak is not installed on your system? \n"
1096
+ "Falling back to transphone.")
1097
+ from transphone.g2p import read_g2p
1098
+ self.g2p_lang = self.language
1099
+ self.phonemizer = "transphone"
1100
+ self.expand_abbreviations = lambda x: x
1101
+ self.transphone = read_g2p()
1102
+ self.phone_to_vector = generate_feature_table()
1103
+ self.phone_to_id = get_phone_to_id()
1104
+ self.id_to_phone = {v: k for k, v in self.phone_to_id.items()}
1105
+ self.text_vector_to_phone_cache = dict()
1106
+
1107
  @staticmethod
1108
  def get_example_sentence(lang):
1109
  if lang == "eng":