utkarsh2299
commited on
Upload 55 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- api.py +63 -0
- app.py +179 -0
- get_phone_mapped_python.py +76 -0
- inference.py +129 -0
- ssn_parser_new/get_phone_mapped_text.py +73 -0
- ssn_parser_new/lists/alphabets +62 -0
- ssn_parser_new/lists/cons +130 -0
- ssn_parser_new/lists/dv +56 -0
- ssn_parser_new/lists/end_syl_list +404 -0
- ssn_parser_new/lists/english +53 -0
- ssn_parser_new/lists/english_tam_map +409 -0
- ssn_parser_new/lists/english_text_oald +46 -0
- ssn_parser_new/lists/f1 +21 -0
- ssn_parser_new/lists/f2 +21 -0
- ssn_parser_new/lists/gen.scp +1 -0
- ssn_parser_new/lists/language_map_cp +264 -0
- ssn_parser_new/lists/num +10 -0
- ssn_parser_new/lists/number +10 -0
- ssn_parser_new/lists/o_au_map +3 -0
- ssn_parser_new/lists/out_word +0 -0
- ssn_parser_new/lists/pb_pos_list_12hrs +9 -0
- ssn_parser_new/lists/phone_list +42 -0
- ssn_parser_new/lists/phoneset_all +419 -0
- ssn_parser_new/lists/phoneset_mei +27 -0
- ssn_parser_new/lists/phoneset_uyir +14 -0
- ssn_parser_new/lists/phoneset_uyirmei +378 -0
- ssn_parser_new/lists/spl_chr +26 -0
- ssn_parser_new/lists/spl_chr_map +27 -0
- ssn_parser_new/lists/sv +6 -0
- ssn_parser_new/lists/syl_list +24 -0
- ssn_parser_new/lists/tamil +48 -0
- ssn_parser_new/lists/tamil_map +150 -0
- ssn_parser_new/lists/u_list +4 -0
- ssn_parser_new/lists/vowel_list +13 -0
- ssn_parser_new/lists/vowels +48 -0
- ssn_parser_new/lists/vuv_list +4 -0
- ssn_parser_new/lists/word1 +1 -0
- ssn_parser_new/non_parallel-parser.py +93 -0
- ssn_parser_new/output +9 -0
- ssn_parser_new/output.cls +9 -0
- ssn_parser_new/output.err +1 -0
- ssn_parser_new/output.words +9 -0
- ssn_parser_new/phone_out_file +9 -0
- ssn_parser_new/phone_out_file.cls +9 -0
- ssn_parser_new/phone_out_file.err +1 -0
- ssn_parser_new/phone_out_file.words +9 -0
- ssn_parser_new/phonify_wrapper.py +50 -0
- ssn_parser_new/scripts/ortho_to_phonetic1.py +75 -0
- ssn_parser_new/scripts/tamil_trans_py +3 -0
.gitattributes
CHANGED
@@ -37,3 +37,4 @@ vocoder/female/aryan/hifigan/generator filter=lfs diff=lfs merge=lfs -text
|
|
37 |
vocoder/female/dravidian/hifigan/generator filter=lfs diff=lfs merge=lfs -text
|
38 |
vocoder/male/aryan/hifigan/generator filter=lfs diff=lfs merge=lfs -text
|
39 |
vocoder/male/dravidian/hifigan/generator filter=lfs diff=lfs merge=lfs -text
|
|
|
|
37 |
vocoder/female/dravidian/hifigan/generator filter=lfs diff=lfs merge=lfs -text
|
38 |
vocoder/male/aryan/hifigan/generator filter=lfs diff=lfs merge=lfs -text
|
39 |
vocoder/male/dravidian/hifigan/generator filter=lfs diff=lfs merge=lfs -text
|
40 |
+
ssn_parser_new/scripts/tamil_trans_py filter=lfs diff=lfs merge=lfs -text
|
api.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# TTS IITM SPEECH LAB
|
2 |
+
import requests
|
3 |
+
import json
|
4 |
+
import base64
|
5 |
+
|
6 |
+
text = "सुप्रभात, आप कैसे हैं?" # hindi
|
7 |
+
# text = "സുപ്രഭാതം, സുഖമാ?" # malayalam
|
8 |
+
# text = "সুপ্ৰভাত, তুমি কেনে?" # manipuri
|
9 |
+
# text = "सुप्रभात, तुम्ही कसे आहात?" # marathi
|
10 |
+
# text = "ಶುಭೋದಯ, ನೀವು ಹೇಗಿದ್ದೀರಿ?" # kannada
|
11 |
+
# text = "बसु म्विथ्बो, बरि दिबाबो?" # bodo male not working <---
|
12 |
+
# text = "Good morning, how are you?" # english
|
13 |
+
# text = "সুপ্ৰভাত, আপুনি কেমন আছে?" # assamese
|
14 |
+
# text = "காலை வணக்கம், நீங்கள் எப்படி இருக்கின்றீர்கள்?" # tamil
|
15 |
+
# text = "ସୁପ୍ରଭାତ, ଆପଣ କେମିତି ଅଛନ୍ତି?" # odia male not working <---
|
16 |
+
# text = "सुप्रभात, आप कैसे छो?" # rajasthani
|
17 |
+
# text = "శుభోదయం, మీరు ఎలా ఉన్నారు?" # telugu
|
18 |
+
# text = "সুপ্রভাত, আপনি কেমন আছেন?" # bengali male not working <---
|
19 |
+
# text = "સુપ્રભાત, તમે કેમ છો?" # gujarati
|
20 |
+
|
21 |
+
lang = 'hindi'
|
22 |
+
gender = 'female'
|
23 |
+
|
24 |
+
url = "http://localhost:4005/tts"
|
25 |
+
# url = 'http://projects.respark.iitm.ac.in:8009/tts' # proxy
|
26 |
+
|
27 |
+
payload = json.dumps({
|
28 |
+
"input": text,
|
29 |
+
"gender": gender,
|
30 |
+
"lang": lang,
|
31 |
+
"alpha": 1,
|
32 |
+
"segmentwise":"True"
|
33 |
+
})
|
34 |
+
headers = {'Content-Type': 'application/json'}
|
35 |
+
response = requests.request("POST", url, headers=headers, data=payload).json()
|
36 |
+
|
37 |
+
audio = response['audio']
|
38 |
+
file_name = "tts.mp3"
|
39 |
+
wav_file = open(file_name,'wb')
|
40 |
+
decode_string = base64.b64decode(audio)
|
41 |
+
wav_file.write(decode_string)
|
42 |
+
wav_file.close()
|
43 |
+
|
44 |
+
'''
|
45 |
+
Supported languages
|
46 |
+
|
47 |
+
Assamese
|
48 |
+
Bengali
|
49 |
+
Bodo
|
50 |
+
English
|
51 |
+
Gujarati
|
52 |
+
Hindi
|
53 |
+
Kannada
|
54 |
+
Malayalam
|
55 |
+
Manipuri
|
56 |
+
Marathi
|
57 |
+
Odia
|
58 |
+
Punjabi
|
59 |
+
Rajasthani
|
60 |
+
Tamil
|
61 |
+
Telugu
|
62 |
+
Urdu
|
63 |
+
'''
|
app.py
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, render_template, request, send_file, jsonify
|
2 |
+
import requests
|
3 |
+
import json
|
4 |
+
import ssl
|
5 |
+
import logging
|
6 |
+
import sys
|
7 |
+
import os
|
8 |
+
import base64
|
9 |
+
import io
|
10 |
+
#replace the path with your hifigan path to import Generator from models.py
|
11 |
+
sys.path.append("hifigan")
|
12 |
+
# import argparse
|
13 |
+
import torch
|
14 |
+
from espnet2.bin.tts_inference import Text2Speech
|
15 |
+
from models import Generator
|
16 |
+
from scipy.io.wavfile import write
|
17 |
+
from meldataset import MAX_WAV_VALUE
|
18 |
+
from env import AttrDict
|
19 |
+
import json
|
20 |
+
import yaml
|
21 |
+
from text_preprocess_for_inference import TTSDurAlignPreprocessor
|
22 |
+
# import time
|
23 |
+
|
24 |
+
logging.basicConfig(filename='access.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
25 |
+
|
26 |
+
SAMPLING_RATE = 22050
|
27 |
+
if torch.cuda.is_available():
|
28 |
+
device = "cuda"
|
29 |
+
else:
|
30 |
+
device = "cpu"
|
31 |
+
|
32 |
+
preprocessor = TTSDurAlignPreprocessor()
|
33 |
+
|
34 |
+
app = Flask(__name__)
|
35 |
+
# app.config['SECRET_KEY'] = 'key'
|
36 |
+
# socketio = SocketIO(app)
|
37 |
+
|
38 |
+
# @socketio.on('new_user')
|
39 |
+
# def handle_new_user(data):
|
40 |
+
# client_id = data['id']
|
41 |
+
# # print('\n'+f"New user connected with ID: {client_id}")
|
42 |
+
# logging.info('\n'+f"New user connected with ID: {client_id}")
|
43 |
+
|
44 |
+
def load_hifigan_vocoder(language, gender, device):
|
45 |
+
# Load HiFi-GAN vocoder configuration file and generator model for the specified language and gender
|
46 |
+
vocoder_config = f"vocoder/{gender}/aryan/hifigan/config.json"
|
47 |
+
vocoder_generator = f"vocoder/{gender}/aryan/hifigan/generator"
|
48 |
+
# Read the contents of the vocoder configuration file
|
49 |
+
with open(vocoder_config, 'r') as f:
|
50 |
+
data = f.read()
|
51 |
+
json_config = json.loads(data)
|
52 |
+
h = AttrDict(json_config)
|
53 |
+
torch.manual_seed(h.seed)
|
54 |
+
# Move the generator model to the specified device (CPU or GPU)
|
55 |
+
device = torch.device(device)
|
56 |
+
generator = Generator(h).to(device)
|
57 |
+
state_dict_g = torch.load(vocoder_generator, device)
|
58 |
+
generator.load_state_dict(state_dict_g['generator'])
|
59 |
+
generator.eval()
|
60 |
+
generator.remove_weight_norm()
|
61 |
+
|
62 |
+
# Return the loaded and prepared HiFi-GAN generator model
|
63 |
+
return generator
|
64 |
+
|
65 |
+
def load_fastspeech2_model(language, gender, device):
|
66 |
+
|
67 |
+
#updating the config.yaml fiel based on language and gender
|
68 |
+
with open(f"{language}/{gender}/model/config.yaml", "r") as file:
|
69 |
+
config = yaml.safe_load(file)
|
70 |
+
|
71 |
+
current_working_directory = os.getcwd()
|
72 |
+
feat="model/feats_stats.npz"
|
73 |
+
pitch="model/pitch_stats.npz"
|
74 |
+
energy="model/energy_stats.npz"
|
75 |
+
|
76 |
+
feat_path=os.path.join(current_working_directory,language,gender,feat)
|
77 |
+
pitch_path=os.path.join(current_working_directory,language,gender,pitch)
|
78 |
+
energy_path=os.path.join(current_working_directory,language,gender,energy)
|
79 |
+
|
80 |
+
|
81 |
+
config["normalize_conf"]["stats_file"] = feat_path
|
82 |
+
config["pitch_normalize_conf"]["stats_file"] = pitch_path
|
83 |
+
config["energy_normalize_conf"]["stats_file"] = energy_path
|
84 |
+
|
85 |
+
with open(f"{language}/{gender}/model/config.yaml", "w") as file:
|
86 |
+
yaml.dump(config, file)
|
87 |
+
|
88 |
+
tts_model = f"{language}/{gender}/model/model.pth"
|
89 |
+
tts_config = f"{language}/{gender}/model/config.yaml"
|
90 |
+
|
91 |
+
|
92 |
+
return Text2Speech(train_config=tts_config, model_file=tts_model, device=device)
|
93 |
+
|
94 |
+
def text_synthesis(language, gender, sample_text, vocoder, MAX_WAV_VALUE, device, alpha=1):
|
95 |
+
# Perform Text-to-Speech synthesis
|
96 |
+
with torch.no_grad():
|
97 |
+
# Load the FastSpeech2 model for the specified language and gender
|
98 |
+
|
99 |
+
model = load_fastspeech2_model(language, gender, device)
|
100 |
+
|
101 |
+
# Generate mel-spectrograms from the input text using the FastSpeech2 model
|
102 |
+
out = model(sample_text, decode_conf={"alpha": alpha})
|
103 |
+
print("TTS Done")
|
104 |
+
x = out["feat_gen_denorm"].T.unsqueeze(0) * 2.3262
|
105 |
+
x = x.to(device)
|
106 |
+
|
107 |
+
# Use the HiFi-GAN vocoder to convert mel-spectrograms to raw audio waveforms
|
108 |
+
y_g_hat = vocoder(x)
|
109 |
+
audio = y_g_hat.squeeze()
|
110 |
+
audio = audio * MAX_WAV_VALUE
|
111 |
+
audio = audio.cpu().numpy().astype('int16')
|
112 |
+
|
113 |
+
# Return the synthesized audio
|
114 |
+
return audio
|
115 |
+
|
116 |
+
def setup_app():
|
117 |
+
genders = ['male','female']
|
118 |
+
# to make dummy calls in all languages available
|
119 |
+
languages = {'hindi': "नमस्ते",'malayalam': "ഹലോ",'manipuri': "হ্যালো",'marathi': "हॅलो",'kannada': "ಹಲೋ",'bodo': "हॅलो",'english': "Hello",'assamese': "হ্যালো",'tamil': "ஹலோ",'odia': "ହେଲୋ",'rajasthani': "हॅलो",'telugu': "హలో",'bengali': "হ্যালো",'gujarati': "હલો"}
|
120 |
+
|
121 |
+
vocoders = {}
|
122 |
+
for gender in genders:
|
123 |
+
vocoders[gender]={}
|
124 |
+
for language,text in languages.items():
|
125 |
+
# Load the HiFi-GAN vocoder with dynamic language and gender
|
126 |
+
vocoder = load_hifigan_vocoder(language, gender, device)
|
127 |
+
vocoders[gender][language] = vocoder
|
128 |
+
# dummy calls
|
129 |
+
print(f"making dummy calls for {language} - {gender}")
|
130 |
+
try:
|
131 |
+
out = text_synthesis(language, gender, text, vocoder, MAX_WAV_VALUE, device)
|
132 |
+
except:
|
133 |
+
message = f"cannot make dummy call for {gender} - {language} <==================="
|
134 |
+
print(message.upper())
|
135 |
+
|
136 |
+
print("Server Started...")
|
137 |
+
return vocoders
|
138 |
+
vocoders = setup_app()
|
139 |
+
|
140 |
+
@app.route('/', methods=['GET'])
|
141 |
+
def main():
|
142 |
+
return "IITM_TTS_V2"
|
143 |
+
|
144 |
+
@app.route('/tts', methods=['GET', 'POST'], strict_slashes=False)
|
145 |
+
def tts():
|
146 |
+
try:
|
147 |
+
json_data = request.get_json()
|
148 |
+
text = json_data["input"]
|
149 |
+
if not isinstance(text,str):
|
150 |
+
input_type = type(text)
|
151 |
+
ret = jsonify(status='failure', reason=f"Unsupported input type {input_type}. Input text should be in string format.")
|
152 |
+
gender = json_data["gender"]
|
153 |
+
language = json_data["lang"].lower()
|
154 |
+
alpha = json_data["alpha"]
|
155 |
+
# Preprocess the sample text
|
156 |
+
preprocessed_text, phrases = preprocessor.preprocess(text, language, gender)
|
157 |
+
preprocessed_text = " ".join(preprocessed_text)
|
158 |
+
vocoder = vocoders[gender][language]
|
159 |
+
out = text_synthesis(language, gender, preprocessed_text, vocoder, MAX_WAV_VALUE, device, alpha=alpha)
|
160 |
+
|
161 |
+
# output_file = f"{language}_{gender}_output.wav"
|
162 |
+
# write(output_file, SAMPLING_RATE, out)
|
163 |
+
# audio_wav_bytes = base64.b64encode(open(output_file, "rb").read())
|
164 |
+
|
165 |
+
# avoid saving file on disk
|
166 |
+
output_stream = io.BytesIO()
|
167 |
+
write(output_stream, SAMPLING_RATE, out)
|
168 |
+
audio_wav_bytes = base64.b64encode(output_stream.getvalue())
|
169 |
+
|
170 |
+
ret = jsonify(status="success",audio=audio_wav_bytes.decode('utf-8'))
|
171 |
+
|
172 |
+
except Exception as err:
|
173 |
+
ret = jsonify(status="failure", reason=str(err))
|
174 |
+
return ret
|
175 |
+
|
176 |
+
if __name__ == '__main__':
|
177 |
+
# ssl_context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
|
178 |
+
# ssl_context.load_cert_chain('./ssl2023/iitm2022.crt','./ssl2023/iitm2022.key')
|
179 |
+
app.run(host='0.0.0.0', port=4005, debug=True)
|
get_phone_mapped_python.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class TextReplacer:
|
2 |
+
def __init__(self):
|
3 |
+
self.replacements = {
|
4 |
+
'aa':'A',
|
5 |
+
'ae':'ऍ',
|
6 |
+
'ag':'ऽ',
|
7 |
+
'ai':'ऐ',
|
8 |
+
'au':'औ',
|
9 |
+
'axx':'अ',
|
10 |
+
'ax':'ऑ',
|
11 |
+
'bh':'B',
|
12 |
+
'ch':'C',
|
13 |
+
'dh':'ध',
|
14 |
+
'dxhq':'T',
|
15 |
+
'dxh':'ढ',
|
16 |
+
'dxq':'D',
|
17 |
+
'dx':'ड',
|
18 |
+
'ee':'E',
|
19 |
+
'ei':'ऐ',
|
20 |
+
'eu':'உ',
|
21 |
+
'gh':'घ',
|
22 |
+
'gq':'G',
|
23 |
+
'hq':'H',
|
24 |
+
'ii':'I',
|
25 |
+
'jh':'J',
|
26 |
+
'khq':'K',
|
27 |
+
'kh':'ख',
|
28 |
+
'kq':'क',
|
29 |
+
'ln':'ൾ',
|
30 |
+
'lw':'ൽ',
|
31 |
+
'lx':'ള',
|
32 |
+
'mq':'M',
|
33 |
+
'nd':'ऩ',
|
34 |
+
'ng':'ङ',
|
35 |
+
'nj':'ञ',
|
36 |
+
'nk':'Y',
|
37 |
+
'nn':'N',
|
38 |
+
'nw':'ൺ',
|
39 |
+
'nx':'ण',
|
40 |
+
'oo':'O',
|
41 |
+
'ou':'औ',
|
42 |
+
'ph':'P',
|
43 |
+
'rqw':'ॠ',
|
44 |
+
'rq':'R',
|
45 |
+
'rw':'ർ',
|
46 |
+
'rx':'ऱ',
|
47 |
+
'sh':'श',
|
48 |
+
'sx':'ष',
|
49 |
+
'txh':'ठ',
|
50 |
+
'th':'थ',
|
51 |
+
'tx':'ट',
|
52 |
+
'uu':'U',
|
53 |
+
'wv':'W',
|
54 |
+
'zh':'Z'
|
55 |
+
|
56 |
+
# ... Add more replacements as needed
|
57 |
+
}
|
58 |
+
|
59 |
+
|
60 |
+
def apply_replacements(self, text):
|
61 |
+
for key, value in self.replacements.items():
|
62 |
+
# print('KEY AND VALUE OF PARSED OUTPUT',key, value)
|
63 |
+
text = text.replace(key, value)
|
64 |
+
temp=""
|
65 |
+
for i in range(len(text)):
|
66 |
+
if text[i]!=" ":
|
67 |
+
temp=temp+text[i]
|
68 |
+
|
69 |
+
return temp
|
70 |
+
|
71 |
+
def apply_replacements_by_phonems(self, text):
|
72 |
+
ans=self.replacements[text]
|
73 |
+
# for key, value in self.replacements.items():
|
74 |
+
# # print('KEY AND VALUE OF PARSED OUTPUT',key, value)
|
75 |
+
# text = text.replace(key, value)
|
76 |
+
return ans
|
inference.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
#replace the path with your hifigan path to import Generator from models.py
|
4 |
+
sys.path.append("hifigan")
|
5 |
+
import argparse
|
6 |
+
import torch
|
7 |
+
from espnet2.bin.tts_inference import Text2Speech
|
8 |
+
from models import Generator
|
9 |
+
from scipy.io.wavfile import write
|
10 |
+
from meldataset import MAX_WAV_VALUE
|
11 |
+
from env import AttrDict
|
12 |
+
import json
|
13 |
+
import yaml
|
14 |
+
from text_preprocess_for_inference import TTSDurAlignPreprocessor, CharTextPreprocessor, TTSPreprocessor
|
15 |
+
|
16 |
+
SAMPLING_RATE = 22050
|
17 |
+
|
18 |
+
def load_hifigan_vocoder(language, gender, device):
|
19 |
+
# Load HiFi-GAN vocoder configuration file and generator model for the specified language and gender
|
20 |
+
vocoder_config = f"vocoder/{gender}/aryan/hifigan/config.json"
|
21 |
+
vocoder_generator = f"vocoder/{gender}/aryan/hifigan/generator"
|
22 |
+
# Read the contents of the vocoder configuration file
|
23 |
+
with open(vocoder_config, 'r') as f:
|
24 |
+
data = f.read()
|
25 |
+
json_config = json.loads(data)
|
26 |
+
h = AttrDict(json_config)
|
27 |
+
torch.manual_seed(h.seed)
|
28 |
+
# Move the generator model to the specified device (CPU or GPU)
|
29 |
+
device = torch.device(device)
|
30 |
+
generator = Generator(h).to(device)
|
31 |
+
state_dict_g = torch.load(vocoder_generator, device)
|
32 |
+
generator.load_state_dict(state_dict_g['generator'])
|
33 |
+
generator.eval()
|
34 |
+
generator.remove_weight_norm()
|
35 |
+
|
36 |
+
# Return the loaded and prepared HiFi-GAN generator model
|
37 |
+
return generator
|
38 |
+
|
39 |
+
|
40 |
+
def load_fastspeech2_model(language, gender, device):
|
41 |
+
|
42 |
+
#updating the config.yaml fiel based on language and gender
|
43 |
+
with open(f"{language}/{gender}/model/config.yaml", "r") as file:
|
44 |
+
config = yaml.safe_load(file)
|
45 |
+
|
46 |
+
current_working_directory = os.getcwd()
|
47 |
+
feat="model/feats_stats.npz"
|
48 |
+
pitch="model/pitch_stats.npz"
|
49 |
+
energy="model/energy_stats.npz"
|
50 |
+
|
51 |
+
feat_path=os.path.join(current_working_directory,language,gender,feat)
|
52 |
+
pitch_path=os.path.join(current_working_directory,language,gender,pitch)
|
53 |
+
energy_path=os.path.join(current_working_directory,language,gender,energy)
|
54 |
+
|
55 |
+
|
56 |
+
config["normalize_conf"]["stats_file"] = feat_path
|
57 |
+
config["pitch_normalize_conf"]["stats_file"] = pitch_path
|
58 |
+
config["energy_normalize_conf"]["stats_file"] = energy_path
|
59 |
+
|
60 |
+
with open(f"{language}/{gender}/model/config.yaml", "w") as file:
|
61 |
+
yaml.dump(config, file)
|
62 |
+
|
63 |
+
tts_model = f"{language}/{gender}/model/model.pth"
|
64 |
+
tts_config = f"{language}/{gender}/model/config.yaml"
|
65 |
+
|
66 |
+
|
67 |
+
return Text2Speech(train_config=tts_config, model_file=tts_model, device=device)
|
68 |
+
|
69 |
+
def text_synthesis(language, gender, sample_text, vocoder, MAX_WAV_VALUE, device, alpha):
|
70 |
+
# Perform Text-to-Speech synthesis
|
71 |
+
with torch.no_grad():
|
72 |
+
# Load the FastSpeech2 model for the specified language and gender
|
73 |
+
|
74 |
+
model = load_fastspeech2_model(language, gender, device)
|
75 |
+
|
76 |
+
print('Alpha ', alpha)
|
77 |
+
|
78 |
+
# Generate mel-spectrograms from the input text using the FastSpeech2 model
|
79 |
+
out = model(sample_text, decode_conf={"alpha": alpha})
|
80 |
+
print("TTS Done")
|
81 |
+
x = out["feat_gen_denorm"].T.unsqueeze(0) * 2.3262
|
82 |
+
x = x.to(device)
|
83 |
+
|
84 |
+
# Use the HiFi-GAN vocoder to convert mel-spectrograms to raw audio waveforms
|
85 |
+
y_g_hat = vocoder(x)
|
86 |
+
audio = y_g_hat.squeeze()
|
87 |
+
audio = audio * MAX_WAV_VALUE
|
88 |
+
audio = audio.cpu().numpy().astype('int16')
|
89 |
+
|
90 |
+
# Return the synthesized audio
|
91 |
+
return audio
|
92 |
+
|
93 |
+
|
94 |
+
if __name__ == "__main__":
|
95 |
+
parser = argparse.ArgumentParser(description="Text-to-Speech Inference")
|
96 |
+
parser.add_argument("--language", type=str, required=True, help="Language (e.g., hindi)")
|
97 |
+
parser.add_argument("--gender", type=str, required=True, help="Gender (e.g., female)")
|
98 |
+
parser.add_argument("--sample_text", type=str, required=True, help="Text to be synthesized")
|
99 |
+
parser.add_argument("--output_file", type=str, default="", help="Output WAV file path")
|
100 |
+
parser.add_argument("--alpha", type=float, default=1, help="Alpha Parameter")
|
101 |
+
|
102 |
+
args = parser.parse_args()
|
103 |
+
|
104 |
+
phone_dictionary = {}
|
105 |
+
# Set the device
|
106 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
107 |
+
|
108 |
+
# Load the HiFi-GAN vocoder with dynamic language and gender
|
109 |
+
vocoder = load_hifigan_vocoder(args.language, args.gender, device)
|
110 |
+
|
111 |
+
if args.language == "urdu" or args.language == "punjabi":
|
112 |
+
preprocessor = CharTextPreprocessor()
|
113 |
+
elif args.language == "english":
|
114 |
+
preprocessor = TTSPreprocessor()
|
115 |
+
else:
|
116 |
+
preprocessor = TTSDurAlignPreprocessor()
|
117 |
+
|
118 |
+
# Preprocess the sample text
|
119 |
+
preprocessed_text, phrases = preprocessor.preprocess(args.sample_text, args.language, args.gender, phone_dictionary)
|
120 |
+
preprocessed_text = " ".join(preprocessed_text)
|
121 |
+
|
122 |
+
|
123 |
+
audio = text_synthesis(args.language, args.gender, preprocessed_text, vocoder, MAX_WAV_VALUE, device, args.alpha)
|
124 |
+
if args.output_file:
|
125 |
+
output_file = f"{args.output_file}"
|
126 |
+
else:
|
127 |
+
output_file = f"{args.language}_{args.gender}_output.wav"
|
128 |
+
|
129 |
+
write(output_file, SAMPLING_RATE, audio)
|
ssn_parser_new/get_phone_mapped_text.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import re
|
3 |
+
|
4 |
+
def replace_in_file(file_path, replacements):
|
5 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
6 |
+
content = file.read()
|
7 |
+
|
8 |
+
for search, replace in replacements.items():
|
9 |
+
content = re.sub(search, replace, content)
|
10 |
+
|
11 |
+
with open(file_path, 'w', encoding='utf-8') as file:
|
12 |
+
file.write(content)
|
13 |
+
|
14 |
+
def main():
|
15 |
+
in_file = sys.argv[1]
|
16 |
+
|
17 |
+
replacements = {
|
18 |
+
'"aa"': '"A"',
|
19 |
+
'"ii"': '"I"',
|
20 |
+
'"uu"': '"U"',
|
21 |
+
'"ee"': '"E"',
|
22 |
+
'"oo"': '"O"',
|
23 |
+
'"nn"': '"N"',
|
24 |
+
'"ae"': '"ऍ"',
|
25 |
+
'"ag"': '"ऽ"',
|
26 |
+
'"au"': '"औ"',
|
27 |
+
'"ax"': '"ऑ"',
|
28 |
+
'"bh"': '"B"',
|
29 |
+
'"ch"': '"C"',
|
30 |
+
'"dh"': '"ध"',
|
31 |
+
'"dx"': '"ड"',
|
32 |
+
'"dxh"': '"ढ"',
|
33 |
+
'"dxhq"': '"ढ़"',
|
34 |
+
'"dxq"': '"ड़"',
|
35 |
+
'"ei"': '"ऐ"',
|
36 |
+
'"ai"': '"ऐ"',
|
37 |
+
'"eu"': '"उ"',
|
38 |
+
'"gh"': '"घ"',
|
39 |
+
'"gq"': '"ग़"',
|
40 |
+
'"hq"': '"H"',
|
41 |
+
'"jh"': '"J"',
|
42 |
+
'"kh"': '"ख"',
|
43 |
+
'"khq"': '"ख़"',
|
44 |
+
'"kq"': '"क़"',
|
45 |
+
'"ln"': '"ൾ"',
|
46 |
+
'"lw"': '"ൽ"',
|
47 |
+
'"lx"': '"ള"',
|
48 |
+
'"mq"': '"M"',
|
49 |
+
'"nd"': '"ऩ"',
|
50 |
+
'"ng"': '"ङ"',
|
51 |
+
'"nj"': '"ञ"',
|
52 |
+
'"nk"': '"़"',
|
53 |
+
'"nw"': '"ൺ"',
|
54 |
+
'"nx"': '"ण"',
|
55 |
+
'"ou"': '"औ"',
|
56 |
+
'"ph"': '"P"',
|
57 |
+
'"rq"': '"R"',
|
58 |
+
'"rqw"': '"ॠ"',
|
59 |
+
'"rw"': '"ർ"',
|
60 |
+
'"rx"': '"ऱ"',
|
61 |
+
'"sh"': '"श"',
|
62 |
+
'"sx"': '"ष"',
|
63 |
+
'"th"': '"थ"',
|
64 |
+
'"tx"': '"ट"',
|
65 |
+
'"txh"': '"ठ"',
|
66 |
+
'"wv"': '"W"',
|
67 |
+
'"zh"': '"Z"',
|
68 |
+
}
|
69 |
+
|
70 |
+
replace_in_file(in_file, replacements)
|
71 |
+
|
72 |
+
if __name__ == "__main__":
|
73 |
+
main()
|
ssn_parser_new/lists/alphabets
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
b
|
3 |
+
c
|
4 |
+
d
|
5 |
+
e
|
6 |
+
f
|
7 |
+
g
|
8 |
+
h
|
9 |
+
i
|
10 |
+
j
|
11 |
+
k
|
12 |
+
l
|
13 |
+
m
|
14 |
+
n
|
15 |
+
o
|
16 |
+
p
|
17 |
+
q
|
18 |
+
r
|
19 |
+
s
|
20 |
+
t
|
21 |
+
u
|
22 |
+
v
|
23 |
+
w
|
24 |
+
x
|
25 |
+
y
|
26 |
+
z
|
27 |
+
A
|
28 |
+
B
|
29 |
+
C
|
30 |
+
D
|
31 |
+
E
|
32 |
+
F
|
33 |
+
G
|
34 |
+
H
|
35 |
+
I
|
36 |
+
J
|
37 |
+
K
|
38 |
+
L
|
39 |
+
M
|
40 |
+
N
|
41 |
+
O
|
42 |
+
P
|
43 |
+
Q
|
44 |
+
R
|
45 |
+
S
|
46 |
+
T
|
47 |
+
U
|
48 |
+
V
|
49 |
+
W
|
50 |
+
X
|
51 |
+
Y
|
52 |
+
Z
|
53 |
+
0
|
54 |
+
1
|
55 |
+
2
|
56 |
+
3
|
57 |
+
4
|
58 |
+
5
|
59 |
+
6
|
60 |
+
7
|
61 |
+
8
|
62 |
+
9
|
ssn_parser_new/lists/cons
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
க
|
2 |
+
ங
|
3 |
+
ச
|
4 |
+
ஜ
|
5 |
+
ஞ
|
6 |
+
ட
|
7 |
+
த
|
8 |
+
ந
|
9 |
+
ண
|
10 |
+
ன
|
11 |
+
ப
|
12 |
+
ம
|
13 |
+
ய
|
14 |
+
ர
|
15 |
+
ற
|
16 |
+
ல
|
17 |
+
ள
|
18 |
+
ழ
|
19 |
+
வ
|
20 |
+
ஷ
|
21 |
+
ஸ
|
22 |
+
ஹ
|
23 |
+
ஃ
|
24 |
+
क
|
25 |
+
ख
|
26 |
+
ग
|
27 |
+
घ
|
28 |
+
ङ
|
29 |
+
च
|
30 |
+
छ
|
31 |
+
ज
|
32 |
+
झ
|
33 |
+
ञ
|
34 |
+
ट
|
35 |
+
ठ
|
36 |
+
ड
|
37 |
+
ढ
|
38 |
+
ण
|
39 |
+
त
|
40 |
+
थ
|
41 |
+
द
|
42 |
+
ध
|
43 |
+
न
|
44 |
+
प
|
45 |
+
फ
|
46 |
+
ब
|
47 |
+
भ
|
48 |
+
म
|
49 |
+
य
|
50 |
+
र
|
51 |
+
ल
|
52 |
+
ळ
|
53 |
+
व
|
54 |
+
ष
|
55 |
+
श
|
56 |
+
स
|
57 |
+
ह
|
58 |
+
क्ष
|
59 |
+
ക
|
60 |
+
ഖ
|
61 |
+
ഗ
|
62 |
+
ഘ
|
63 |
+
ങ
|
64 |
+
ച
|
65 |
+
ഛ
|
66 |
+
ജ
|
67 |
+
ഝ
|
68 |
+
ഞ
|
69 |
+
ട
|
70 |
+
ഠ
|
71 |
+
ഡ
|
72 |
+
ഢ
|
73 |
+
ണ
|
74 |
+
ത
|
75 |
+
ഥ
|
76 |
+
ദ
|
77 |
+
ധ
|
78 |
+
ന
|
79 |
+
പ
|
80 |
+
ഫ
|
81 |
+
ബ
|
82 |
+
ഭ
|
83 |
+
മ
|
84 |
+
യ
|
85 |
+
ര
|
86 |
+
റ
|
87 |
+
ല
|
88 |
+
ള
|
89 |
+
ഴ
|
90 |
+
വ
|
91 |
+
ശ
|
92 |
+
ഷ
|
93 |
+
സ
|
94 |
+
ഹ
|
95 |
+
బ
|
96 |
+
భ
|
97 |
+
చ
|
98 |
+
ఛ
|
99 |
+
డ
|
100 |
+
ఢ
|
101 |
+
ద
|
102 |
+
ధ
|
103 |
+
ఫ
|
104 |
+
గ
|
105 |
+
ఘ
|
106 |
+
హ
|
107 |
+
జ
|
108 |
+
ఝ
|
109 |
+
క
|
110 |
+
ఖ
|
111 |
+
ల
|
112 |
+
ళ
|
113 |
+
మ
|
114 |
+
న
|
115 |
+
ణ
|
116 |
+
ప
|
117 |
+
ఞ
|
118 |
+
ఙ
|
119 |
+
ర
|
120 |
+
ఱ
|
121 |
+
ఋ
|
122 |
+
స
|
123 |
+
ష
|
124 |
+
శ
|
125 |
+
ట
|
126 |
+
ఠ
|
127 |
+
త
|
128 |
+
థ
|
129 |
+
వ
|
130 |
+
య
|
ssn_parser_new/lists/dv
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
்
|
2 |
+
ா
|
3 |
+
ி
|
4 |
+
ீ
|
5 |
+
ு
|
6 |
+
ூ
|
7 |
+
ெ
|
8 |
+
ே
|
9 |
+
ை
|
10 |
+
ொ
|
11 |
+
ோ
|
12 |
+
ௌ
|
13 |
+
ा
|
14 |
+
ि
|
15 |
+
ी
|
16 |
+
ु
|
17 |
+
ू
|
18 |
+
ृ
|
19 |
+
े
|
20 |
+
ै
|
21 |
+
ो
|
22 |
+
ौ
|
23 |
+
ं
|
24 |
+
ः
|
25 |
+
ँ
|
26 |
+
ം
|
27 |
+
ാ
|
28 |
+
ി
|
29 |
+
ീ
|
30 |
+
ു
|
31 |
+
ൂ
|
32 |
+
ൃ
|
33 |
+
െ
|
34 |
+
േ
|
35 |
+
ൈ
|
36 |
+
ൊ
|
37 |
+
ോ
|
38 |
+
ൌ
|
39 |
+
്
|
40 |
+
ഃ
|
41 |
+
ా
|
42 |
+
ి
|
43 |
+
ీ
|
44 |
+
ు
|
45 |
+
ూ
|
46 |
+
ృ
|
47 |
+
ె
|
48 |
+
ే
|
49 |
+
ై
|
50 |
+
ొ
|
51 |
+
ో
|
52 |
+
ౌ
|
53 |
+
ౖ
|
54 |
+
ఁ
|
55 |
+
ం
|
56 |
+
ః
|
ssn_parser_new/lists/end_syl_list
ADDED
@@ -0,0 +1,404 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
zhoon
|
2 |
+
zhoo
|
3 |
+
yoong
|
4 |
+
yitx
|
5 |
+
yis
|
6 |
+
yins
|
7 |
+
yew
|
8 |
+
yek
|
9 |
+
yarsk
|
10 |
+
yalx
|
11 |
+
yaasx
|
12 |
+
yaalx
|
13 |
+
wur
|
14 |
+
wort
|
15 |
+
wizh
|
16 |
+
wisx
|
17 |
+
wis
|
18 |
+
wingg
|
19 |
+
windd
|
20 |
+
wiil
|
21 |
+
wiik
|
22 |
+
werp
|
23 |
+
weey
|
24 |
+
wees
|
25 |
+
weertx
|
26 |
+
was
|
27 |
+
wanx
|
28 |
+
wakt
|
29 |
+
wain
|
30 |
+
waih
|
31 |
+
waaw
|
32 |
+
waam
|
33 |
+
uum
|
34 |
+
txoosx
|
35 |
+
txoostx
|
36 |
+
txoomsk
|
37 |
+
txoom
|
38 |
+
txitx
|
39 |
+
txingg
|
40 |
+
txiir
|
41 |
+
txiim
|
42 |
+
txiil
|
43 |
+
txes
|
44 |
+
txel
|
45 |
+
txek
|
46 |
+
txeetxs
|
47 |
+
txeestx
|
48 |
+
txees
|
49 |
+
txeenxdx
|
50 |
+
txeel
|
51 |
+
txastx
|
52 |
+
txanxdx
|
53 |
+
txail
|
54 |
+
txaatxs
|
55 |
+
txaarc
|
56 |
+
txaak
|
57 |
+
tuu
|
58 |
+
trees
|
59 |
+
tooy
|
60 |
+
toon
|
61 |
+
too
|
62 |
+
tong
|
63 |
+
teew
|
64 |
+
teelx
|
65 |
+
taalx
|
66 |
+
sxoor
|
67 |
+
sxoonm
|
68 |
+
sxit
|
69 |
+
sxil
|
70 |
+
sxifr
|
71 |
+
sxar
|
72 |
+
sxair
|
73 |
+
sxaandd
|
74 |
+
suum
|
75 |
+
sung
|
76 |
+
stxoor
|
77 |
+
stxiitx
|
78 |
+
spings
|
79 |
+
spik
|
80 |
+
spek
|
81 |
+
soow
|
82 |
+
so
|
83 |
+
sketxc
|
84 |
+
skaarf
|
85 |
+
sink
|
86 |
+
singg
|
87 |
+
sim
|
88 |
+
sils
|
89 |
+
siil
|
90 |
+
sii
|
91 |
+
seyng
|
92 |
+
seyn
|
93 |
+
sep
|
94 |
+
sentx
|
95 |
+
sem
|
96 |
+
sees
|
97 |
+
sau
|
98 |
+
satxs
|
99 |
+
sartx
|
100 |
+
sans
|
101 |
+
rxulx
|
102 |
+
rxoytx
|
103 |
+
rxoo
|
104 |
+
rxiing
|
105 |
+
rxen
|
106 |
+
rxeel
|
107 |
+
rxars
|
108 |
+
rxaang
|
109 |
+
ruups
|
110 |
+
roow
|
111 |
+
roop
|
112 |
+
ritx
|
113 |
+
risks
|
114 |
+
riitxs
|
115 |
+
riir
|
116 |
+
riil
|
117 |
+
rektx
|
118 |
+
ratxs
|
119 |
+
rastx
|
120 |
+
raksx
|
121 |
+
raitxs
|
122 |
+
raas
|
123 |
+
raaptxs
|
124 |
+
puun
|
125 |
+
puum
|
126 |
+
praangg
|
127 |
+
poop
|
128 |
+
poons
|
129 |
+
pooltx
|
130 |
+
plxas
|
131 |
+
piy
|
132 |
+
pilxs
|
133 |
+
piir
|
134 |
+
piins
|
135 |
+
pes
|
136 |
+
pel
|
137 |
+
pek
|
138 |
+
peetxc
|
139 |
+
peesx
|
140 |
+
pars
|
141 |
+
pair
|
142 |
+
paayntx
|
143 |
+
paasx
|
144 |
+
paask
|
145 |
+
paartx
|
146 |
+
paalx
|
147 |
+
paah
|
148 |
+
oos
|
149 |
+
nxuu
|
150 |
+
nxung
|
151 |
+
nxoo
|
152 |
+
nxiing
|
153 |
+
nxantx
|
154 |
+
nxaar
|
155 |
+
nxaam
|
156 |
+
nook
|
157 |
+
njaar
|
158 |
+
ningg
|
159 |
+
niip
|
160 |
+
nii
|
161 |
+
ngin
|
162 |
+
nga
|
163 |
+
ng
|
164 |
+
neetx
|
165 |
+
neel
|
166 |
+
neej
|
167 |
+
ndooys
|
168 |
+
ndir
|
169 |
+
ndil
|
170 |
+
ndiips
|
171 |
+
ndafs
|
172 |
+
ndaastx
|
173 |
+
nd
|
174 |
+
natx
|
175 |
+
nastxm
|
176 |
+
naltx
|
177 |
+
nals
|
178 |
+
naitx
|
179 |
+
naays
|
180 |
+
naayk
|
181 |
+
naay
|
182 |
+
naangg
|
183 |
+
muuw
|
184 |
+
muurs
|
185 |
+
mooltx
|
186 |
+
mis
|
187 |
+
mirm
|
188 |
+
miiys
|
189 |
+
miit
|
190 |
+
miis
|
191 |
+
meesxr
|
192 |
+
maut
|
193 |
+
martx
|
194 |
+
marnd
|
195 |
+
mams
|
196 |
+
mac
|
197 |
+
maatx
|
198 |
+
maanxdx
|
199 |
+
lxur
|
200 |
+
lxing
|
201 |
+
lxiim
|
202 |
+
lxeen
|
203 |
+
lxeek
|
204 |
+
lxark
|
205 |
+
lxair
|
206 |
+
lxaaw
|
207 |
+
lxaark
|
208 |
+
lulx
|
209 |
+
loow
|
210 |
+
loom
|
211 |
+
lisx
|
212 |
+
lips
|
213 |
+
linj
|
214 |
+
lingg
|
215 |
+
liil
|
216 |
+
liik
|
217 |
+
leyng
|
218 |
+
letx
|
219 |
+
ler
|
220 |
+
leej
|
221 |
+
lars
|
222 |
+
lanxdx
|
223 |
+
laanxdx
|
224 |
+
laaks
|
225 |
+
laah
|
226 |
+
kwaang
|
227 |
+
kuurt
|
228 |
+
kris
|
229 |
+
kriis
|
230 |
+
kriim
|
231 |
+
kreem
|
232 |
+
kraim
|
233 |
+
kool
|
234 |
+
kir
|
235 |
+
kiptx
|
236 |
+
kings
|
237 |
+
kiizht
|
238 |
+
kiim
|
239 |
+
keym
|
240 |
+
kens
|
241 |
+
keetx
|
242 |
+
kees
|
243 |
+
keep
|
244 |
+
keems
|
245 |
+
keelx
|
246 |
+
kays
|
247 |
+
kanxdx
|
248 |
+
kails
|
249 |
+
kaantx
|
250 |
+
kaangg
|
251 |
+
kaah
|
252 |
+
juur
|
253 |
+
joons
|
254 |
+
jol
|
255 |
+
jiir
|
256 |
+
jen
|
257 |
+
jatx
|
258 |
+
jas
|
259 |
+
jars
|
260 |
+
jain
|
261 |
+
jaaw
|
262 |
+
jaas
|
263 |
+
i
|
264 |
+
hul
|
265 |
+
hraam
|
266 |
+
hoo
|
267 |
+
hon
|
268 |
+
hhan
|
269 |
+
heu
|
270 |
+
hee
|
271 |
+
he
|
272 |
+
har
|
273 |
+
haj
|
274 |
+
haars
|
275 |
+
haar
|
276 |
+
haap
|
277 |
+
gur
|
278 |
+
gulx
|
279 |
+
goos
|
280 |
+
goor
|
281 |
+
gis
|
282 |
+
gins
|
283 |
+
giizh
|
284 |
+
gels
|
285 |
+
geet
|
286 |
+
gaw
|
287 |
+
ganxdx
|
288 |
+
gals
|
289 |
+
gaastx
|
290 |
+
gaandd
|
291 |
+
gaam
|
292 |
+
gaaks
|
293 |
+
frsi
|
294 |
+
foors
|
295 |
+
faitx
|
296 |
+
fai
|
297 |
+
ert
|
298 |
+
elx
|
299 |
+
ef
|
300 |
+
ec
|
301 |
+
dxur
|
302 |
+
dxunx
|
303 |
+
dxos
|
304 |
+
dxoow
|
305 |
+
dxoor
|
306 |
+
dxoon
|
307 |
+
dxoom
|
308 |
+
dxingg
|
309 |
+
dxeesx
|
310 |
+
dxeej
|
311 |
+
dxasxk
|
312 |
+
dxas
|
313 |
+
dxaitxs
|
314 |
+
dxaas
|
315 |
+
dxaartx
|
316 |
+
dxaak
|
317 |
+
dxaaf
|
318 |
+
duur
|
319 |
+
duun
|
320 |
+
dun
|
321 |
+
dem
|
322 |
+
deesx
|
323 |
+
dees
|
324 |
+
deelx
|
325 |
+
day
|
326 |
+
darn
|
327 |
+
dams
|
328 |
+
dalx
|
329 |
+
daart
|
330 |
+
cuu
|
331 |
+
col
|
332 |
+
cin
|
333 |
+
cii
|
334 |
+
ceew
|
335 |
+
caaw
|
336 |
+
caatx
|
337 |
+
caar
|
338 |
+
bur
|
339 |
+
bunx
|
340 |
+
book
|
341 |
+
bisx
|
342 |
+
bins
|
343 |
+
bilxs
|
344 |
+
biing
|
345 |
+
biin
|
346 |
+
bert
|
347 |
+
benx
|
348 |
+
beetx
|
349 |
+
beesx
|
350 |
+
band
|
351 |
+
bals
|
352 |
+
baawtx
|
353 |
+
baas
|
354 |
+
baalx
|
355 |
+
baaltx
|
356 |
+
asxk
|
357 |
+
ars
|
358 |
+
ank
|
359 |
+
aas
|
360 |
+
aang
|
361 |
+
aam
|
362 |
+
aaktx
|
363 |
+
lxaa
|
364 |
+
dxeen
|
365 |
+
ma
|
366 |
+
jaa
|
367 |
+
sey
|
368 |
+
rxoom
|
369 |
+
lxulx
|
370 |
+
rxaay
|
371 |
+
daal
|
372 |
+
car
|
373 |
+
sis
|
374 |
+
diir
|
375 |
+
aa
|
376 |
+
txaal
|
377 |
+
ra
|
378 |
+
maam
|
379 |
+
woom
|
380 |
+
lxoo
|
381 |
+
see
|
382 |
+
wuut
|
383 |
+
rxaal
|
384 |
+
poom
|
385 |
+
paanxdx
|
386 |
+
neen
|
387 |
+
nas
|
388 |
+
lxa
|
389 |
+
las
|
390 |
+
him
|
391 |
+
hi
|
392 |
+
doom
|
393 |
+
cee
|
394 |
+
buu
|
395 |
+
boo
|
396 |
+
nxee
|
397 |
+
txeen
|
398 |
+
poo
|
399 |
+
noo
|
400 |
+
haa
|
401 |
+
deen
|
402 |
+
daay
|
403 |
+
puu
|
404 |
+
kaan
|
ssn_parser_new/lists/english
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
b
|
3 |
+
c
|
4 |
+
d
|
5 |
+
e
|
6 |
+
f
|
7 |
+
g
|
8 |
+
h
|
9 |
+
i
|
10 |
+
j
|
11 |
+
k
|
12 |
+
l
|
13 |
+
m
|
14 |
+
n
|
15 |
+
o
|
16 |
+
p
|
17 |
+
q
|
18 |
+
r
|
19 |
+
s
|
20 |
+
t
|
21 |
+
u
|
22 |
+
v
|
23 |
+
w
|
24 |
+
x
|
25 |
+
y
|
26 |
+
z
|
27 |
+
A
|
28 |
+
B
|
29 |
+
C
|
30 |
+
D
|
31 |
+
E
|
32 |
+
F
|
33 |
+
G
|
34 |
+
H
|
35 |
+
I
|
36 |
+
J
|
37 |
+
K
|
38 |
+
L
|
39 |
+
M
|
40 |
+
N
|
41 |
+
O
|
42 |
+
P
|
43 |
+
Q
|
44 |
+
R
|
45 |
+
S
|
46 |
+
T
|
47 |
+
U
|
48 |
+
V
|
49 |
+
W
|
50 |
+
X
|
51 |
+
Y
|
52 |
+
Z
|
53 |
+
|
ssn_parser_new/lists/english_tam_map
ADDED
@@ -0,0 +1,409 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#map file containing tamil caracters [ ] english caracters
|
2 |
+
|
3 |
+
|
4 |
+
! !
|
5 |
+
@ @
|
6 |
+
" "
|
7 |
+
# #
|
8 |
+
% %
|
9 |
+
' '
|
10 |
+
( (
|
11 |
+
) )
|
12 |
+
* *
|
13 |
+
+ +
|
14 |
+
, ,
|
15 |
+
_ _
|
16 |
+
- -
|
17 |
+
. .
|
18 |
+
/ /
|
19 |
+
0 0
|
20 |
+
1 1
|
21 |
+
2 2
|
22 |
+
3 3
|
23 |
+
4 4
|
24 |
+
5 5
|
25 |
+
6 6
|
26 |
+
7 7
|
27 |
+
8 8
|
28 |
+
9 9
|
29 |
+
; ;
|
30 |
+
அ a
|
31 |
+
ஆ aa
|
32 |
+
இ i
|
33 |
+
ஈ ii
|
34 |
+
உ u
|
35 |
+
ஊ uu
|
36 |
+
எ e
|
37 |
+
ஏ ee
|
38 |
+
ஐ ai
|
39 |
+
ஒ o
|
40 |
+
ஓ oo
|
41 |
+
ஔ au
|
42 |
+
க ka
|
43 |
+
கா kaa
|
44 |
+
கி ki
|
45 |
+
கீ kii
|
46 |
+
கு ku
|
47 |
+
கூ kuu
|
48 |
+
கெ ke
|
49 |
+
கே kee
|
50 |
+
கை kai
|
51 |
+
கொ ko
|
52 |
+
கோ koo
|
53 |
+
கௌ kau
|
54 |
+
க் k
|
55 |
+
ங nga
|
56 |
+
ஙா ngaa
|
57 |
+
ஙி ngi
|
58 |
+
ஙீ ngii
|
59 |
+
ஙு ngu
|
60 |
+
ஙூ nguu
|
61 |
+
ஙெ nge
|
62 |
+
ஙே ngee
|
63 |
+
ஙை ngai
|
64 |
+
ஙொ ngo
|
65 |
+
ஙோ ngoo
|
66 |
+
ஙௌ ngau
|
67 |
+
ங் ng
|
68 |
+
ச ca
|
69 |
+
சா caa
|
70 |
+
சி ci
|
71 |
+
சீ cii
|
72 |
+
சு cu
|
73 |
+
சூ cuu
|
74 |
+
செ ce
|
75 |
+
சே cee
|
76 |
+
சை cai
|
77 |
+
சொ co
|
78 |
+
சோ coo
|
79 |
+
சௌ cau
|
80 |
+
ச் c
|
81 |
+
ஜ ja
|
82 |
+
ஜா jaa
|
83 |
+
ஜி ji
|
84 |
+
ஜீ jii
|
85 |
+
ஜு ju
|
86 |
+
ஜூ juu
|
87 |
+
ஜெ je
|
88 |
+
ஜே jee
|
89 |
+
ஜை jai
|
90 |
+
ஜொ jo
|
91 |
+
ஜோ joo
|
92 |
+
ஜௌ jau
|
93 |
+
ஜ் j
|
94 |
+
ஞ nja
|
95 |
+
ஞா njaa
|
96 |
+
ஞி nji
|
97 |
+
ஞீ njii
|
98 |
+
ஞு nju
|
99 |
+
ஞூ njuu
|
100 |
+
ஞெ nje
|
101 |
+
ஞே njee
|
102 |
+
ஞை njai
|
103 |
+
ஞொ njo
|
104 |
+
ஞோ njoo
|
105 |
+
ஞௌ njau
|
106 |
+
ஞ் nj
|
107 |
+
ட txa
|
108 |
+
டா txaa
|
109 |
+
டி txi
|
110 |
+
டீ txii
|
111 |
+
டு txu
|
112 |
+
டூ txuu
|
113 |
+
டெ txe
|
114 |
+
டே txee
|
115 |
+
டை txai
|
116 |
+
டொ txo
|
117 |
+
டோ txoo
|
118 |
+
டௌ txau
|
119 |
+
ட் tx
|
120 |
+
த ta
|
121 |
+
தா taa
|
122 |
+
தி ti
|
123 |
+
தீ tii
|
124 |
+
து tu
|
125 |
+
தூ tuu
|
126 |
+
தெ te
|
127 |
+
தே tee
|
128 |
+
தை tai
|
129 |
+
தொ to
|
130 |
+
தோ too
|
131 |
+
தௌ tau
|
132 |
+
த் t
|
133 |
+
ந nda
|
134 |
+
நா ndaa
|
135 |
+
நி ndi
|
136 |
+
நீ ndii
|
137 |
+
நு ndu
|
138 |
+
நூ nduu
|
139 |
+
நெ nde
|
140 |
+
நே ndee
|
141 |
+
நை ndai
|
142 |
+
நொ ndo
|
143 |
+
நோ ndoo
|
144 |
+
நௌ ndau
|
145 |
+
ந் nd
|
146 |
+
ண nxa
|
147 |
+
ணா nxaa
|
148 |
+
ணி nxi
|
149 |
+
ணீ nxii
|
150 |
+
ணு nxu
|
151 |
+
ணூ nxuu
|
152 |
+
ணெ nxe
|
153 |
+
ணே nxee
|
154 |
+
ணை nxai
|
155 |
+
ணொ nxo
|
156 |
+
ணோ nxoo
|
157 |
+
ணௌ nxau
|
158 |
+
ண் nx
|
159 |
+
ன na
|
160 |
+
னா naa
|
161 |
+
னி ni
|
162 |
+
னீ nii
|
163 |
+
னு nu
|
164 |
+
னூ nuu
|
165 |
+
னெ ne
|
166 |
+
னே nee
|
167 |
+
னை nai
|
168 |
+
னொ no
|
169 |
+
னோ noo
|
170 |
+
னௌ nau
|
171 |
+
ன் n
|
172 |
+
ப pa
|
173 |
+
பா paa
|
174 |
+
பி pi
|
175 |
+
பீ pii
|
176 |
+
பு pu
|
177 |
+
பூ puu
|
178 |
+
பெ pe
|
179 |
+
பே pee
|
180 |
+
பை pai
|
181 |
+
பொ po
|
182 |
+
போ poo
|
183 |
+
பௌ pau
|
184 |
+
ப் p
|
185 |
+
ம ma
|
186 |
+
மா maa
|
187 |
+
மி mi
|
188 |
+
மீ mii
|
189 |
+
மு mu
|
190 |
+
மூ muu
|
191 |
+
மெ me
|
192 |
+
மே mee
|
193 |
+
மை mai
|
194 |
+
மொ mo
|
195 |
+
மோ moo
|
196 |
+
மௌ mau
|
197 |
+
ம் m
|
198 |
+
ய ya
|
199 |
+
யா yaa
|
200 |
+
யி yi
|
201 |
+
யீ yii
|
202 |
+
யு yu
|
203 |
+
யூ yuu
|
204 |
+
யெ ye
|
205 |
+
யே yee
|
206 |
+
யை yai
|
207 |
+
யொ yo
|
208 |
+
யோ yoo
|
209 |
+
யௌ yau
|
210 |
+
ய் y
|
211 |
+
ர ra
|
212 |
+
ரா raa
|
213 |
+
ரி ri
|
214 |
+
ரீ rii
|
215 |
+
ரு ru
|
216 |
+
ரூ ruu
|
217 |
+
ரெ re
|
218 |
+
ரே ree
|
219 |
+
ரை rai
|
220 |
+
ரொ ro
|
221 |
+
ரோ roo
|
222 |
+
ரௌ rau
|
223 |
+
ர் r
|
224 |
+
ற rxa
|
225 |
+
றா rxaa
|
226 |
+
றி rxi
|
227 |
+
றீ rxii
|
228 |
+
று rxu
|
229 |
+
றூ rxuu
|
230 |
+
றெ rxe
|
231 |
+
றே rxee
|
232 |
+
றை rxai
|
233 |
+
றொ rxo
|
234 |
+
றோ rxoo
|
235 |
+
றௌ rxau
|
236 |
+
ற் rx
|
237 |
+
ல la
|
238 |
+
லா laa
|
239 |
+
லி li
|
240 |
+
ப pa
|
241 |
+
பா paa
|
242 |
+
பி pi
|
243 |
+
பீ pii
|
244 |
+
பு pu
|
245 |
+
பூ puu
|
246 |
+
பெ pe
|
247 |
+
பே pee
|
248 |
+
பை pai
|
249 |
+
பொ po
|
250 |
+
போ poo
|
251 |
+
பௌ pau
|
252 |
+
ப் p
|
253 |
+
லீ lii
|
254 |
+
லு lu
|
255 |
+
லூ luu
|
256 |
+
லெ le
|
257 |
+
லே lee
|
258 |
+
லை lai
|
259 |
+
லொ lo
|
260 |
+
லோ loo
|
261 |
+
லௌ lau
|
262 |
+
ல் l
|
263 |
+
ள lxa
|
264 |
+
ளா lxaa
|
265 |
+
ளி lxi
|
266 |
+
ளீ lxii
|
267 |
+
ளு lxu
|
268 |
+
ளூ lxuu
|
269 |
+
ளெ lxe
|
270 |
+
ளே lxee
|
271 |
+
ளை lxai
|
272 |
+
ளொ lxo
|
273 |
+
ளோ lxoo
|
274 |
+
ளௌ lxau
|
275 |
+
ள் lx
|
276 |
+
ழ zha
|
277 |
+
ழா zhaa
|
278 |
+
ழி zhi
|
279 |
+
ழீ zhii
|
280 |
+
ழு zhu
|
281 |
+
ழூ zhuu
|
282 |
+
ழெ zhe
|
283 |
+
ழே zhee
|
284 |
+
ழை zhai
|
285 |
+
ழொ zho
|
286 |
+
ழோ zhoo
|
287 |
+
ழௌ zhau
|
288 |
+
ழ் zh
|
289 |
+
வ wa
|
290 |
+
வா waa
|
291 |
+
வி wi
|
292 |
+
வீ wii
|
293 |
+
வு wu
|
294 |
+
வூ wuu
|
295 |
+
வெ we
|
296 |
+
வே wee
|
297 |
+
வை wai
|
298 |
+
வொ wo
|
299 |
+
வோ woo
|
300 |
+
வௌ wau
|
301 |
+
வ் w
|
302 |
+
ஷ sxa
|
303 |
+
ஷா sxaa
|
304 |
+
ஷி sxi
|
305 |
+
ஷீ sxii
|
306 |
+
ஷு sxu
|
307 |
+
ஷூ sxuu
|
308 |
+
ஷெ sxe
|
309 |
+
ஷே sxee
|
310 |
+
ஷை sxai
|
311 |
+
ஷொ sxao
|
312 |
+
ஷோ sxaoo
|
313 |
+
ஷௌ sxau
|
314 |
+
ஷ் sx
|
315 |
+
ஸ sa
|
316 |
+
ஸா saa
|
317 |
+
ஸி si
|
318 |
+
ஸீ sii
|
319 |
+
ஸு su
|
320 |
+
ஸூ suu
|
321 |
+
ஸெ se
|
322 |
+
ஸே see
|
323 |
+
ஸை sai
|
324 |
+
ஸொ so
|
325 |
+
ஸோ soo
|
326 |
+
ஸௌ sau
|
327 |
+
ஸ் s
|
328 |
+
ஹ ha
|
329 |
+
ஹா haa
|
330 |
+
ஹி hi
|
331 |
+
ஹீ hii
|
332 |
+
ஹு hu
|
333 |
+
ஹூ huu
|
334 |
+
ஹெ he
|
335 |
+
ஹே hee
|
336 |
+
ஹை hai
|
337 |
+
ஹொ ho
|
338 |
+
ஹோ hoo
|
339 |
+
ஹௌ hau
|
340 |
+
ஹ் h
|
341 |
+
ஃப fa
|
342 |
+
ஃபா faa
|
343 |
+
ஃபி fi
|
344 |
+
ஃபீ fii
|
345 |
+
ஃபு fu
|
346 |
+
ஃபூ fuu
|
347 |
+
ஃபெ fe
|
348 |
+
ஃபே fee
|
349 |
+
ஃபை fai
|
350 |
+
ஃபொ fo
|
351 |
+
ஃபோ foo
|
352 |
+
ஃபௌ fau
|
353 |
+
ஃப் f
|
354 |
+
a a
|
355 |
+
b b
|
356 |
+
c c
|
357 |
+
d d
|
358 |
+
e e
|
359 |
+
f f
|
360 |
+
g g
|
361 |
+
h h
|
362 |
+
i i
|
363 |
+
j j
|
364 |
+
k k
|
365 |
+
l l
|
366 |
+
m m
|
367 |
+
n n
|
368 |
+
o o
|
369 |
+
p p
|
370 |
+
q q
|
371 |
+
r r
|
372 |
+
s s
|
373 |
+
t t
|
374 |
+
u u
|
375 |
+
v v
|
376 |
+
w w
|
377 |
+
x x
|
378 |
+
y y
|
379 |
+
z z
|
380 |
+
A A
|
381 |
+
B B
|
382 |
+
C C
|
383 |
+
D D
|
384 |
+
E E
|
385 |
+
F F
|
386 |
+
G G
|
387 |
+
H H
|
388 |
+
I I
|
389 |
+
J J
|
390 |
+
K K
|
391 |
+
L L
|
392 |
+
M M
|
393 |
+
N N
|
394 |
+
O O
|
395 |
+
P P
|
396 |
+
Q Q
|
397 |
+
R R
|
398 |
+
S S
|
399 |
+
T T
|
400 |
+
U U
|
401 |
+
V V
|
402 |
+
W W
|
403 |
+
X X
|
404 |
+
Y Y
|
405 |
+
Z Z
|
406 |
+
अ a
|
407 |
+
आ aa
|
408 |
+
मं m
|
409 |
+
मा maa
|
ssn_parser_new/lists/english_text_oald
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
uh a
|
2 |
+
e e
|
3 |
+
a ae
|
4 |
+
o aa
|
5 |
+
i i
|
6 |
+
u u
|
7 |
+
ii ii
|
8 |
+
uu uu
|
9 |
+
oo aa
|
10 |
+
aa aa
|
11 |
+
@@ ar
|
12 |
+
ai ai
|
13 |
+
ei ee
|
14 |
+
oi aay
|
15 |
+
au au
|
16 |
+
ou oo
|
17 |
+
e@ ee
|
18 |
+
i@ iiya
|
19 |
+
u@ uwa
|
20 |
+
@ a
|
21 |
+
p p
|
22 |
+
t tx
|
23 |
+
k k
|
24 |
+
b b
|
25 |
+
d dx
|
26 |
+
g g
|
27 |
+
s s
|
28 |
+
z s
|
29 |
+
sh sx
|
30 |
+
zh sx
|
31 |
+
f f
|
32 |
+
v w
|
33 |
+
th t
|
34 |
+
dh d
|
35 |
+
ch c
|
36 |
+
jh j
|
37 |
+
h h
|
38 |
+
m m
|
39 |
+
n nx
|
40 |
+
ng ng
|
41 |
+
l l
|
42 |
+
y y
|
43 |
+
r r
|
44 |
+
w w
|
45 |
+
# #
|
46 |
+
SIL
|
ssn_parser_new/lists/f1
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
JJ
|
2 |
+
N_NN
|
3 |
+
N_NN
|
4 |
+
N_NN
|
5 |
+
V_VM_VNF_RP
|
6 |
+
V_VM_VF_VBN
|
7 |
+
N_NN
|
8 |
+
N_NN
|
9 |
+
V_VM_VNF_RP
|
10 |
+
N_NNP
|
11 |
+
QT_QTC
|
12 |
+
DM_DMR
|
13 |
+
RB
|
14 |
+
RB
|
15 |
+
V_VM_VNF_INF
|
16 |
+
N_NN
|
17 |
+
PR_PRP
|
18 |
+
N_NN
|
19 |
+
N_NN
|
20 |
+
V_VM_VF
|
21 |
+
N_NN
|
ssn_parser_new/lists/f2
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
N_NN
|
2 |
+
V_VM_VF
|
3 |
+
V_VM_VF_VBN
|
4 |
+
V_VM_VNF_VBN
|
5 |
+
N_NN
|
6 |
+
V_VM_VF
|
7 |
+
PR_PRP
|
8 |
+
PSP
|
9 |
+
PSP
|
10 |
+
N_NN
|
11 |
+
N_NN
|
12 |
+
N_NN
|
13 |
+
V_VM_VF
|
14 |
+
V_VM_VNF_VBN
|
15 |
+
V_VM_VF
|
16 |
+
RB
|
17 |
+
PSP
|
18 |
+
RP_NEG
|
19 |
+
CC_CCS
|
20 |
+
CC_CCS
|
21 |
+
V_VM_VNF_COND
|
ssn_parser_new/lists/gen.scp
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/home/rachel/ssn_hts_demo/lab/1.lab
|
ssn_parser_new/lists/language_map_cp
ADDED
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
! !
|
3 |
+
@ @
|
4 |
+
" "
|
5 |
+
# #
|
6 |
+
% %
|
7 |
+
' '
|
8 |
+
( (
|
9 |
+
) )
|
10 |
+
* *
|
11 |
+
+ +
|
12 |
+
, ,
|
13 |
+
_ _
|
14 |
+
- -
|
15 |
+
/ /
|
16 |
+
0 0
|
17 |
+
1 1
|
18 |
+
2 2
|
19 |
+
3 3
|
20 |
+
4 4
|
21 |
+
5 5
|
22 |
+
6 6
|
23 |
+
7 7
|
24 |
+
8 8
|
25 |
+
9 9
|
26 |
+
; ;
|
27 |
+
: :
|
28 |
+
்
|
29 |
+
ா aa
|
30 |
+
ி i
|
31 |
+
ீ ii
|
32 |
+
ு u
|
33 |
+
ூ uu
|
34 |
+
ெ e
|
35 |
+
ே ee
|
36 |
+
ை ai
|
37 |
+
ொ o
|
38 |
+
ோ oo
|
39 |
+
ௌ au
|
40 |
+
அ a
|
41 |
+
ஆ aa
|
42 |
+
இ i
|
43 |
+
ஈ ii
|
44 |
+
உ u
|
45 |
+
ஊ uu
|
46 |
+
எ e
|
47 |
+
ஏ ee
|
48 |
+
ஐ ai
|
49 |
+
ஒ o
|
50 |
+
ஓ oo
|
51 |
+
ஔ au
|
52 |
+
க k
|
53 |
+
ங ng
|
54 |
+
ச c
|
55 |
+
ஜ j
|
56 |
+
ஞ nj
|
57 |
+
ட tx
|
58 |
+
த t
|
59 |
+
ந nd
|
60 |
+
ண nx
|
61 |
+
ன n
|
62 |
+
ப p
|
63 |
+
ம m
|
64 |
+
ய y
|
65 |
+
ர r
|
66 |
+
ற rx
|
67 |
+
ல l
|
68 |
+
ள lx
|
69 |
+
ழ zh
|
70 |
+
வ w
|
71 |
+
ஷ sx
|
72 |
+
ஸ s
|
73 |
+
ஹ h
|
74 |
+
ஃ g
|
75 |
+
ஃப f
|
76 |
+
्
|
77 |
+
ा aa
|
78 |
+
ि i
|
79 |
+
ी ii
|
80 |
+
ु u
|
81 |
+
ू uu
|
82 |
+
ृ rx
|
83 |
+
े ee
|
84 |
+
ै ai
|
85 |
+
ो oo
|
86 |
+
ौ au
|
87 |
+
ं n
|
88 |
+
ः aha
|
89 |
+
ँ n
|
90 |
+
अ a
|
91 |
+
आ aa
|
92 |
+
इ i
|
93 |
+
ई ii
|
94 |
+
उ u
|
95 |
+
ऊ uu
|
96 |
+
ऋ rx
|
97 |
+
ए ee
|
98 |
+
ऐ ai
|
99 |
+
ओ oo
|
100 |
+
औ au
|
101 |
+
क k
|
102 |
+
ख kh
|
103 |
+
ग g
|
104 |
+
घ gh
|
105 |
+
ङ ng
|
106 |
+
च c
|
107 |
+
छ ch
|
108 |
+
ज j
|
109 |
+
झ jh
|
110 |
+
ञ nj
|
111 |
+
ट tx
|
112 |
+
ठ txh
|
113 |
+
ड dx
|
114 |
+
ढ dxh
|
115 |
+
ण nx
|
116 |
+
त t
|
117 |
+
थ th
|
118 |
+
द d
|
119 |
+
ध dh
|
120 |
+
न nd
|
121 |
+
प p
|
122 |
+
फ ph
|
123 |
+
ब b
|
124 |
+
भ bh
|
125 |
+
म m
|
126 |
+
य y
|
127 |
+
र r
|
128 |
+
ल l
|
129 |
+
ळ lx
|
130 |
+
व w
|
131 |
+
ष sx
|
132 |
+
श sh
|
133 |
+
स s
|
134 |
+
ह h
|
135 |
+
क्ष ksh
|
136 |
+
അ a
|
137 |
+
ആ aa
|
138 |
+
ഇ i
|
139 |
+
ഈ ii
|
140 |
+
ഉ u
|
141 |
+
ഊ uu
|
142 |
+
ഋ rx
|
143 |
+
എ e
|
144 |
+
ഏ ee
|
145 |
+
ഐ ai
|
146 |
+
ഒ o
|
147 |
+
ഓ oo
|
148 |
+
ഔ au
|
149 |
+
ം m
|
150 |
+
്
|
151 |
+
ഃ
|
152 |
+
ാ aa
|
153 |
+
ി i
|
154 |
+
ീ ii
|
155 |
+
ു u
|
156 |
+
ൂ uu
|
157 |
+
ൃ rx
|
158 |
+
െ e
|
159 |
+
േ ee
|
160 |
+
ൈ ai
|
161 |
+
ൊ o
|
162 |
+
ോ oo
|
163 |
+
ൌ au
|
164 |
+
ക k
|
165 |
+
ഖ k
|
166 |
+
ഗ g
|
167 |
+
ഘ g
|
168 |
+
ങ nx
|
169 |
+
ച c
|
170 |
+
ഛ c
|
171 |
+
ജ j
|
172 |
+
ഝ j
|
173 |
+
ഞ nj
|
174 |
+
ട tx
|
175 |
+
ഠ tx
|
176 |
+
ഡ t
|
177 |
+
ഢ t
|
178 |
+
ണ nx
|
179 |
+
ത t
|
180 |
+
ഥ tx
|
181 |
+
ദ d
|
182 |
+
ധ d
|
183 |
+
ന nd
|
184 |
+
പ p
|
185 |
+
ഫ f
|
186 |
+
ബ b
|
187 |
+
ഭ b
|
188 |
+
മ m
|
189 |
+
യ y
|
190 |
+
ര r
|
191 |
+
റ rx
|
192 |
+
ല l
|
193 |
+
ള lx
|
194 |
+
ഴ zh
|
195 |
+
വ w
|
196 |
+
ശ sx
|
197 |
+
ഷ sh
|
198 |
+
സ s
|
199 |
+
ഹ h
|
200 |
+
ా aa
|
201 |
+
ి i
|
202 |
+
ీ ii
|
203 |
+
ు u
|
204 |
+
ూ uu
|
205 |
+
ృ rx
|
206 |
+
ె e
|
207 |
+
ే ee
|
208 |
+
ై ai
|
209 |
+
ొ o
|
210 |
+
ో oo
|
211 |
+
ౌ au
|
212 |
+
ౖ ai
|
213 |
+
ఁ n
|
214 |
+
ం n
|
215 |
+
ః aha
|
216 |
+
అ a
|
217 |
+
ఆ aa
|
218 |
+
ఇ i
|
219 |
+
ఈ ii
|
220 |
+
ఉ u
|
221 |
+
ఊ uu
|
222 |
+
ఎ e
|
223 |
+
ఏ ee
|
224 |
+
ఐ ai
|
225 |
+
ఒ o
|
226 |
+
ఓ oo
|
227 |
+
ఔ au
|
228 |
+
బ b
|
229 |
+
భ b
|
230 |
+
చ c
|
231 |
+
ఛ c
|
232 |
+
డ dx
|
233 |
+
ఢ dx
|
234 |
+
ద dh
|
235 |
+
ధ d
|
236 |
+
ఫ f
|
237 |
+
గ g
|
238 |
+
ఘ g
|
239 |
+
హ h
|
240 |
+
జ j
|
241 |
+
ఝ j
|
242 |
+
క k
|
243 |
+
ఖ k
|
244 |
+
ల l
|
245 |
+
ళ lx
|
246 |
+
మ m
|
247 |
+
న nd
|
248 |
+
ణ nx
|
249 |
+
ప p
|
250 |
+
ఞ nj
|
251 |
+
ఙ ng
|
252 |
+
ర r
|
253 |
+
ఱ rx
|
254 |
+
ఋ rx
|
255 |
+
స s
|
256 |
+
ష sh
|
257 |
+
శ sh
|
258 |
+
ట t
|
259 |
+
ఠ tx
|
260 |
+
త th
|
261 |
+
థ t
|
262 |
+
వ w
|
263 |
+
య y
|
264 |
+
SIL SIL
|
ssn_parser_new/lists/num
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
0
|
2 |
+
1
|
3 |
+
2
|
4 |
+
3
|
5 |
+
4
|
6 |
+
5
|
7 |
+
6
|
8 |
+
7
|
9 |
+
8
|
10 |
+
9
|
ssn_parser_new/lists/number
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
1
|
2 |
+
2
|
3 |
+
3
|
4 |
+
4
|
5 |
+
5
|
6 |
+
6
|
7 |
+
7
|
8 |
+
8
|
9 |
+
9
|
10 |
+
0
|
ssn_parser_new/lists/o_au_map
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
e aa o
|
2 |
+
ee aa oo
|
3 |
+
e lx au
|
ssn_parser_new/lists/out_word
ADDED
File without changes
|
ssn_parser_new/lists/pb_pos_list_12hrs
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
V_VM_VNG
|
2 |
+
V_VM_VF
|
3 |
+
RP_INJ
|
4 |
+
V_VM_VNF_COND
|
5 |
+
N_NNV
|
6 |
+
V_VM_VNF_RP_PSP
|
7 |
+
RP_NEG
|
8 |
+
PSP
|
9 |
+
CC_CCD
|
ssn_parser_new/lists/phone_list
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
aa
|
3 |
+
ai
|
4 |
+
au
|
5 |
+
b
|
6 |
+
c
|
7 |
+
d
|
8 |
+
dx
|
9 |
+
e
|
10 |
+
ee
|
11 |
+
eu
|
12 |
+
f
|
13 |
+
g
|
14 |
+
h
|
15 |
+
i
|
16 |
+
ii
|
17 |
+
j
|
18 |
+
k
|
19 |
+
l
|
20 |
+
lx
|
21 |
+
m
|
22 |
+
n
|
23 |
+
nd
|
24 |
+
ng
|
25 |
+
nj
|
26 |
+
nx
|
27 |
+
o
|
28 |
+
oo
|
29 |
+
p
|
30 |
+
r
|
31 |
+
rx
|
32 |
+
s
|
33 |
+
SIL
|
34 |
+
sx
|
35 |
+
t
|
36 |
+
tx
|
37 |
+
u
|
38 |
+
uu
|
39 |
+
w
|
40 |
+
y
|
41 |
+
zh
|
42 |
+
ae
|
ssn_parser_new/lists/phoneset_all
ADDED
@@ -0,0 +1,419 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
aa
|
3 |
+
ae
|
4 |
+
ai
|
5 |
+
au
|
6 |
+
b
|
7 |
+
ba
|
8 |
+
baa
|
9 |
+
bai
|
10 |
+
bau
|
11 |
+
be
|
12 |
+
bee
|
13 |
+
beu
|
14 |
+
bi
|
15 |
+
bii
|
16 |
+
bo
|
17 |
+
boo
|
18 |
+
bu
|
19 |
+
buu
|
20 |
+
c
|
21 |
+
ca
|
22 |
+
caa
|
23 |
+
cai
|
24 |
+
cau
|
25 |
+
ce
|
26 |
+
cee
|
27 |
+
ceu
|
28 |
+
ci
|
29 |
+
cii
|
30 |
+
co
|
31 |
+
coo
|
32 |
+
cu
|
33 |
+
cuu
|
34 |
+
d
|
35 |
+
da
|
36 |
+
daa
|
37 |
+
dai
|
38 |
+
dau
|
39 |
+
de
|
40 |
+
dee
|
41 |
+
deu
|
42 |
+
di
|
43 |
+
dii
|
44 |
+
do
|
45 |
+
doo
|
46 |
+
du
|
47 |
+
duu
|
48 |
+
dx
|
49 |
+
dxa
|
50 |
+
dxaa
|
51 |
+
dxai
|
52 |
+
dxau
|
53 |
+
dxe
|
54 |
+
dxee
|
55 |
+
dxeu
|
56 |
+
dxi
|
57 |
+
dxii
|
58 |
+
dxo
|
59 |
+
dxoo
|
60 |
+
dxu
|
61 |
+
dxuu
|
62 |
+
e
|
63 |
+
ee
|
64 |
+
eu
|
65 |
+
f
|
66 |
+
fa
|
67 |
+
faa
|
68 |
+
fai
|
69 |
+
fau
|
70 |
+
fe
|
71 |
+
fee
|
72 |
+
feu
|
73 |
+
fi
|
74 |
+
fii
|
75 |
+
fo
|
76 |
+
foo
|
77 |
+
fu
|
78 |
+
fuu
|
79 |
+
g
|
80 |
+
ga
|
81 |
+
gaa
|
82 |
+
gai
|
83 |
+
gau
|
84 |
+
ge
|
85 |
+
gee
|
86 |
+
geu
|
87 |
+
gi
|
88 |
+
gii
|
89 |
+
go
|
90 |
+
goo
|
91 |
+
gu
|
92 |
+
guu
|
93 |
+
h
|
94 |
+
ha
|
95 |
+
haa
|
96 |
+
hai
|
97 |
+
hau
|
98 |
+
he
|
99 |
+
hee
|
100 |
+
heu
|
101 |
+
hi
|
102 |
+
hii
|
103 |
+
ho
|
104 |
+
hoo
|
105 |
+
hu
|
106 |
+
huu
|
107 |
+
i
|
108 |
+
ii
|
109 |
+
j
|
110 |
+
ja
|
111 |
+
jaa
|
112 |
+
jai
|
113 |
+
jau
|
114 |
+
je
|
115 |
+
jee
|
116 |
+
jeu
|
117 |
+
ji
|
118 |
+
jii
|
119 |
+
jo
|
120 |
+
joo
|
121 |
+
ju
|
122 |
+
juu
|
123 |
+
k
|
124 |
+
ka
|
125 |
+
kaa
|
126 |
+
kai
|
127 |
+
kau
|
128 |
+
ke
|
129 |
+
kee
|
130 |
+
keu
|
131 |
+
ki
|
132 |
+
kii
|
133 |
+
ko
|
134 |
+
koo
|
135 |
+
ku
|
136 |
+
kuu
|
137 |
+
l
|
138 |
+
la
|
139 |
+
laa
|
140 |
+
lai
|
141 |
+
lau
|
142 |
+
le
|
143 |
+
lee
|
144 |
+
leu
|
145 |
+
li
|
146 |
+
lii
|
147 |
+
lo
|
148 |
+
loo
|
149 |
+
lu
|
150 |
+
luu
|
151 |
+
lx
|
152 |
+
lxa
|
153 |
+
lxaa
|
154 |
+
lxai
|
155 |
+
lxau
|
156 |
+
lxe
|
157 |
+
lxee
|
158 |
+
lxeu
|
159 |
+
lxi
|
160 |
+
lxii
|
161 |
+
lxo
|
162 |
+
lxoo
|
163 |
+
lxu
|
164 |
+
lxuu
|
165 |
+
m
|
166 |
+
ma
|
167 |
+
maa
|
168 |
+
mai
|
169 |
+
mau
|
170 |
+
me
|
171 |
+
mee
|
172 |
+
meu
|
173 |
+
mi
|
174 |
+
mii
|
175 |
+
mo
|
176 |
+
moo
|
177 |
+
mu
|
178 |
+
muu
|
179 |
+
n
|
180 |
+
na
|
181 |
+
naa
|
182 |
+
nai
|
183 |
+
nau
|
184 |
+
nd
|
185 |
+
nda
|
186 |
+
ndaa
|
187 |
+
ndai
|
188 |
+
ndau
|
189 |
+
nde
|
190 |
+
ndee
|
191 |
+
ndeu
|
192 |
+
ndi
|
193 |
+
ndii
|
194 |
+
ndo
|
195 |
+
ndoo
|
196 |
+
ndu
|
197 |
+
nduu
|
198 |
+
ne
|
199 |
+
nee
|
200 |
+
neu
|
201 |
+
ng
|
202 |
+
nga
|
203 |
+
ngaa
|
204 |
+
ngai
|
205 |
+
ngau
|
206 |
+
nge
|
207 |
+
ngee
|
208 |
+
ngeu
|
209 |
+
ngi
|
210 |
+
ngii
|
211 |
+
ngo
|
212 |
+
ngoo
|
213 |
+
ngu
|
214 |
+
nguu
|
215 |
+
ni
|
216 |
+
nii
|
217 |
+
nj
|
218 |
+
nja
|
219 |
+
njaa
|
220 |
+
njai
|
221 |
+
njau
|
222 |
+
nje
|
223 |
+
njee
|
224 |
+
njeu
|
225 |
+
nji
|
226 |
+
njii
|
227 |
+
njo
|
228 |
+
njoo
|
229 |
+
nju
|
230 |
+
njuu
|
231 |
+
no
|
232 |
+
noo
|
233 |
+
nu
|
234 |
+
nuu
|
235 |
+
nx
|
236 |
+
nxa
|
237 |
+
nxaa
|
238 |
+
nxai
|
239 |
+
nxau
|
240 |
+
nxe
|
241 |
+
nxee
|
242 |
+
nxeu
|
243 |
+
nxi
|
244 |
+
nxii
|
245 |
+
nxo
|
246 |
+
nxoo
|
247 |
+
nxu
|
248 |
+
nxuu
|
249 |
+
o
|
250 |
+
oo
|
251 |
+
p
|
252 |
+
pa
|
253 |
+
paa
|
254 |
+
pai
|
255 |
+
pau
|
256 |
+
pe
|
257 |
+
pee
|
258 |
+
peu
|
259 |
+
pi
|
260 |
+
pii
|
261 |
+
po
|
262 |
+
poo
|
263 |
+
pu
|
264 |
+
puu
|
265 |
+
r
|
266 |
+
ra
|
267 |
+
raa
|
268 |
+
rai
|
269 |
+
rau
|
270 |
+
re
|
271 |
+
ree
|
272 |
+
reu
|
273 |
+
ri
|
274 |
+
rii
|
275 |
+
ro
|
276 |
+
roo
|
277 |
+
ru
|
278 |
+
ruu
|
279 |
+
rx
|
280 |
+
rxa
|
281 |
+
rxaa
|
282 |
+
rxai
|
283 |
+
rxau
|
284 |
+
rxe
|
285 |
+
rxee
|
286 |
+
rxeu
|
287 |
+
rxi
|
288 |
+
rxii
|
289 |
+
rxo
|
290 |
+
rxoo
|
291 |
+
rxu
|
292 |
+
rxuu
|
293 |
+
s
|
294 |
+
sa
|
295 |
+
saa
|
296 |
+
sai
|
297 |
+
sau
|
298 |
+
se
|
299 |
+
see
|
300 |
+
seu
|
301 |
+
si
|
302 |
+
sii
|
303 |
+
so
|
304 |
+
soo
|
305 |
+
su
|
306 |
+
suu
|
307 |
+
sx
|
308 |
+
sxa
|
309 |
+
sxaa
|
310 |
+
sxai
|
311 |
+
sxau
|
312 |
+
sxe
|
313 |
+
sxee
|
314 |
+
sxeu
|
315 |
+
sxi
|
316 |
+
sxii
|
317 |
+
sxo
|
318 |
+
sxoo
|
319 |
+
sxu
|
320 |
+
sxuu
|
321 |
+
t
|
322 |
+
ta
|
323 |
+
taa
|
324 |
+
tai
|
325 |
+
tau
|
326 |
+
te
|
327 |
+
tee
|
328 |
+
teu
|
329 |
+
ti
|
330 |
+
tii
|
331 |
+
to
|
332 |
+
too
|
333 |
+
tu
|
334 |
+
tuu
|
335 |
+
tx
|
336 |
+
txa
|
337 |
+
txaa
|
338 |
+
txai
|
339 |
+
txau
|
340 |
+
txe
|
341 |
+
txee
|
342 |
+
txeu
|
343 |
+
txi
|
344 |
+
txii
|
345 |
+
txo
|
346 |
+
txoo
|
347 |
+
txu
|
348 |
+
txuu
|
349 |
+
u
|
350 |
+
uu
|
351 |
+
w
|
352 |
+
wa
|
353 |
+
waa
|
354 |
+
wai
|
355 |
+
wau
|
356 |
+
we
|
357 |
+
wee
|
358 |
+
weu
|
359 |
+
wi
|
360 |
+
wii
|
361 |
+
wo
|
362 |
+
woo
|
363 |
+
wu
|
364 |
+
wuu
|
365 |
+
y
|
366 |
+
ya
|
367 |
+
yaa
|
368 |
+
yai
|
369 |
+
yau
|
370 |
+
ye
|
371 |
+
yee
|
372 |
+
yeu
|
373 |
+
yi
|
374 |
+
yii
|
375 |
+
yo
|
376 |
+
yoo
|
377 |
+
yu
|
378 |
+
yuu
|
379 |
+
zh
|
380 |
+
zha
|
381 |
+
zhaa
|
382 |
+
zhai
|
383 |
+
zhau
|
384 |
+
zhe
|
385 |
+
zhee
|
386 |
+
zheu
|
387 |
+
zhi
|
388 |
+
zhii
|
389 |
+
zho
|
390 |
+
zhoo
|
391 |
+
zhu
|
392 |
+
zhuu
|
393 |
+
bae
|
394 |
+
cae
|
395 |
+
dae
|
396 |
+
dxae
|
397 |
+
fae
|
398 |
+
gae
|
399 |
+
hae
|
400 |
+
jae
|
401 |
+
kae
|
402 |
+
lae
|
403 |
+
lxae
|
404 |
+
mae
|
405 |
+
nae
|
406 |
+
ndae
|
407 |
+
ngae
|
408 |
+
njae
|
409 |
+
nxae
|
410 |
+
pae
|
411 |
+
rae
|
412 |
+
rxae
|
413 |
+
sae
|
414 |
+
sxae
|
415 |
+
tae
|
416 |
+
txae
|
417 |
+
wae
|
418 |
+
yae
|
419 |
+
zhae
|
ssn_parser_new/lists/phoneset_mei
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
b
|
2 |
+
c
|
3 |
+
d
|
4 |
+
dx
|
5 |
+
f
|
6 |
+
g
|
7 |
+
h
|
8 |
+
j
|
9 |
+
k
|
10 |
+
l
|
11 |
+
lx
|
12 |
+
m
|
13 |
+
n
|
14 |
+
nd
|
15 |
+
ng
|
16 |
+
nj
|
17 |
+
nx
|
18 |
+
p
|
19 |
+
r
|
20 |
+
rx
|
21 |
+
s
|
22 |
+
sx
|
23 |
+
t
|
24 |
+
tx
|
25 |
+
w
|
26 |
+
y
|
27 |
+
zh
|
ssn_parser_new/lists/phoneset_uyir
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
aa
|
3 |
+
ai
|
4 |
+
au
|
5 |
+
e
|
6 |
+
ee
|
7 |
+
eu
|
8 |
+
i
|
9 |
+
ii
|
10 |
+
o
|
11 |
+
oo
|
12 |
+
u
|
13 |
+
uu
|
14 |
+
ae
|
ssn_parser_new/lists/phoneset_uyirmei
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ba
|
2 |
+
baa
|
3 |
+
bai
|
4 |
+
bau
|
5 |
+
be
|
6 |
+
bee
|
7 |
+
beu
|
8 |
+
bi
|
9 |
+
bii
|
10 |
+
bo
|
11 |
+
boo
|
12 |
+
bu
|
13 |
+
buu
|
14 |
+
ca
|
15 |
+
caa
|
16 |
+
cai
|
17 |
+
cau
|
18 |
+
ce
|
19 |
+
cee
|
20 |
+
ceu
|
21 |
+
ci
|
22 |
+
cii
|
23 |
+
co
|
24 |
+
coo
|
25 |
+
cu
|
26 |
+
cuu
|
27 |
+
da
|
28 |
+
daa
|
29 |
+
dai
|
30 |
+
dau
|
31 |
+
de
|
32 |
+
dee
|
33 |
+
deu
|
34 |
+
di
|
35 |
+
dii
|
36 |
+
do
|
37 |
+
doo
|
38 |
+
du
|
39 |
+
duu
|
40 |
+
dxa
|
41 |
+
dxaa
|
42 |
+
dxai
|
43 |
+
dxau
|
44 |
+
dxe
|
45 |
+
dxee
|
46 |
+
dxeu
|
47 |
+
dxi
|
48 |
+
dxii
|
49 |
+
dxo
|
50 |
+
dxoo
|
51 |
+
dxu
|
52 |
+
dxuu
|
53 |
+
fa
|
54 |
+
faa
|
55 |
+
fai
|
56 |
+
fau
|
57 |
+
fe
|
58 |
+
fee
|
59 |
+
feu
|
60 |
+
fi
|
61 |
+
fii
|
62 |
+
fo
|
63 |
+
foo
|
64 |
+
fu
|
65 |
+
fuu
|
66 |
+
ga
|
67 |
+
gaa
|
68 |
+
gai
|
69 |
+
gau
|
70 |
+
ge
|
71 |
+
gee
|
72 |
+
geu
|
73 |
+
gi
|
74 |
+
gii
|
75 |
+
go
|
76 |
+
goo
|
77 |
+
gu
|
78 |
+
guu
|
79 |
+
ha
|
80 |
+
haa
|
81 |
+
hai
|
82 |
+
hau
|
83 |
+
he
|
84 |
+
hee
|
85 |
+
heu
|
86 |
+
hi
|
87 |
+
hii
|
88 |
+
ho
|
89 |
+
hoo
|
90 |
+
hu
|
91 |
+
huu
|
92 |
+
ja
|
93 |
+
jaa
|
94 |
+
jai
|
95 |
+
jau
|
96 |
+
je
|
97 |
+
jee
|
98 |
+
jeu
|
99 |
+
ji
|
100 |
+
jii
|
101 |
+
jo
|
102 |
+
joo
|
103 |
+
ju
|
104 |
+
juu
|
105 |
+
ka
|
106 |
+
kaa
|
107 |
+
kai
|
108 |
+
kau
|
109 |
+
ke
|
110 |
+
kee
|
111 |
+
keu
|
112 |
+
ki
|
113 |
+
kii
|
114 |
+
ko
|
115 |
+
koo
|
116 |
+
ku
|
117 |
+
kuu
|
118 |
+
la
|
119 |
+
laa
|
120 |
+
lai
|
121 |
+
lau
|
122 |
+
le
|
123 |
+
lee
|
124 |
+
leu
|
125 |
+
li
|
126 |
+
lii
|
127 |
+
lo
|
128 |
+
loo
|
129 |
+
lu
|
130 |
+
luu
|
131 |
+
lxa
|
132 |
+
lxaa
|
133 |
+
lxai
|
134 |
+
lxau
|
135 |
+
lxe
|
136 |
+
lxee
|
137 |
+
lxeu
|
138 |
+
lxi
|
139 |
+
lxii
|
140 |
+
lxo
|
141 |
+
lxoo
|
142 |
+
lxu
|
143 |
+
lxuu
|
144 |
+
ma
|
145 |
+
maa
|
146 |
+
mai
|
147 |
+
mau
|
148 |
+
me
|
149 |
+
mee
|
150 |
+
meu
|
151 |
+
mi
|
152 |
+
mii
|
153 |
+
mo
|
154 |
+
moo
|
155 |
+
mu
|
156 |
+
muu
|
157 |
+
na
|
158 |
+
naa
|
159 |
+
nai
|
160 |
+
nau
|
161 |
+
ne
|
162 |
+
nee
|
163 |
+
neu
|
164 |
+
ni
|
165 |
+
nii
|
166 |
+
no
|
167 |
+
noo
|
168 |
+
nu
|
169 |
+
nuu
|
170 |
+
nda
|
171 |
+
ndaa
|
172 |
+
ndai
|
173 |
+
ndau
|
174 |
+
nde
|
175 |
+
ndee
|
176 |
+
ndeu
|
177 |
+
ndi
|
178 |
+
ndii
|
179 |
+
ndo
|
180 |
+
ndoo
|
181 |
+
ndu
|
182 |
+
nduu
|
183 |
+
nga
|
184 |
+
ngaa
|
185 |
+
ngai
|
186 |
+
ngau
|
187 |
+
nge
|
188 |
+
ngee
|
189 |
+
ngeu
|
190 |
+
ngi
|
191 |
+
ngii
|
192 |
+
ngo
|
193 |
+
ngoo
|
194 |
+
ngu
|
195 |
+
nguu
|
196 |
+
nja
|
197 |
+
njaa
|
198 |
+
njai
|
199 |
+
njau
|
200 |
+
nje
|
201 |
+
njee
|
202 |
+
njeu
|
203 |
+
nji
|
204 |
+
njii
|
205 |
+
njo
|
206 |
+
njoo
|
207 |
+
nju
|
208 |
+
njuu
|
209 |
+
nxa
|
210 |
+
nxaa
|
211 |
+
nxai
|
212 |
+
nxau
|
213 |
+
nxe
|
214 |
+
nxee
|
215 |
+
nxeu
|
216 |
+
nxi
|
217 |
+
nxii
|
218 |
+
nxo
|
219 |
+
nxoo
|
220 |
+
nxu
|
221 |
+
nxuu
|
222 |
+
pa
|
223 |
+
paa
|
224 |
+
pai
|
225 |
+
pau
|
226 |
+
pe
|
227 |
+
pee
|
228 |
+
peu
|
229 |
+
pi
|
230 |
+
pii
|
231 |
+
po
|
232 |
+
poo
|
233 |
+
pu
|
234 |
+
puu
|
235 |
+
ra
|
236 |
+
raa
|
237 |
+
rai
|
238 |
+
rau
|
239 |
+
re
|
240 |
+
ree
|
241 |
+
reu
|
242 |
+
ri
|
243 |
+
rii
|
244 |
+
ro
|
245 |
+
roo
|
246 |
+
ru
|
247 |
+
ruu
|
248 |
+
rxa
|
249 |
+
rxaa
|
250 |
+
rxai
|
251 |
+
rxau
|
252 |
+
rxe
|
253 |
+
rxee
|
254 |
+
rxeu
|
255 |
+
rxi
|
256 |
+
rxii
|
257 |
+
rxo
|
258 |
+
rxoo
|
259 |
+
rxu
|
260 |
+
rxuu
|
261 |
+
sa
|
262 |
+
saa
|
263 |
+
sai
|
264 |
+
sau
|
265 |
+
se
|
266 |
+
see
|
267 |
+
seu
|
268 |
+
si
|
269 |
+
sii
|
270 |
+
so
|
271 |
+
soo
|
272 |
+
su
|
273 |
+
suu
|
274 |
+
sxa
|
275 |
+
sxaa
|
276 |
+
sxai
|
277 |
+
sxau
|
278 |
+
sxe
|
279 |
+
sxee
|
280 |
+
sxeu
|
281 |
+
sxi
|
282 |
+
sxii
|
283 |
+
sxo
|
284 |
+
sxoo
|
285 |
+
sxu
|
286 |
+
sxuu
|
287 |
+
ta
|
288 |
+
taa
|
289 |
+
tai
|
290 |
+
tau
|
291 |
+
te
|
292 |
+
tee
|
293 |
+
teu
|
294 |
+
ti
|
295 |
+
tii
|
296 |
+
to
|
297 |
+
too
|
298 |
+
tu
|
299 |
+
tuu
|
300 |
+
txa
|
301 |
+
txaa
|
302 |
+
txai
|
303 |
+
txau
|
304 |
+
txe
|
305 |
+
txee
|
306 |
+
txeu
|
307 |
+
txi
|
308 |
+
txii
|
309 |
+
txo
|
310 |
+
txoo
|
311 |
+
txu
|
312 |
+
txuu
|
313 |
+
wa
|
314 |
+
waa
|
315 |
+
wai
|
316 |
+
wau
|
317 |
+
we
|
318 |
+
wee
|
319 |
+
weu
|
320 |
+
wi
|
321 |
+
wii
|
322 |
+
wo
|
323 |
+
woo
|
324 |
+
wu
|
325 |
+
wuu
|
326 |
+
ya
|
327 |
+
yaa
|
328 |
+
yai
|
329 |
+
yau
|
330 |
+
ye
|
331 |
+
yee
|
332 |
+
yeu
|
333 |
+
yi
|
334 |
+
yii
|
335 |
+
yo
|
336 |
+
yoo
|
337 |
+
yu
|
338 |
+
yuu
|
339 |
+
zha
|
340 |
+
zhaa
|
341 |
+
zhai
|
342 |
+
zhau
|
343 |
+
zhe
|
344 |
+
zhee
|
345 |
+
zheu
|
346 |
+
zhi
|
347 |
+
zhii
|
348 |
+
zho
|
349 |
+
zhoo
|
350 |
+
zhu
|
351 |
+
zhuu
|
352 |
+
bae
|
353 |
+
cae
|
354 |
+
dae
|
355 |
+
dxae
|
356 |
+
fae
|
357 |
+
gae
|
358 |
+
hae
|
359 |
+
jae
|
360 |
+
kae
|
361 |
+
lae
|
362 |
+
lxae
|
363 |
+
mae
|
364 |
+
nae
|
365 |
+
ndae
|
366 |
+
ngae
|
367 |
+
njae
|
368 |
+
nxae
|
369 |
+
pae
|
370 |
+
rae
|
371 |
+
rxae
|
372 |
+
sae
|
373 |
+
sxae
|
374 |
+
tae
|
375 |
+
txae
|
376 |
+
wae
|
377 |
+
yae
|
378 |
+
zhae
|
ssn_parser_new/lists/spl_chr
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
!
|
2 |
+
@
|
3 |
+
#
|
4 |
+
$
|
5 |
+
%
|
6 |
+
^
|
7 |
+
&
|
8 |
+
*
|
9 |
+
(
|
10 |
+
)
|
11 |
+
+
|
12 |
+
=
|
13 |
+
{
|
14 |
+
}
|
15 |
+
[
|
16 |
+
]
|
17 |
+
"
|
18 |
+
;
|
19 |
+
'
|
20 |
+
<
|
21 |
+
>
|
22 |
+
,
|
23 |
+
.
|
24 |
+
?
|
25 |
+
“
|
26 |
+
”
|
ssn_parser_new/lists/spl_chr_map
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
!
|
2 |
+
@ at அட்
|
3 |
+
# hash ஹேஷ்
|
4 |
+
$ dollar டாலர்
|
5 |
+
% percent சதவிகிதம்
|
6 |
+
^
|
7 |
+
& and மற்றும்
|
8 |
+
*
|
9 |
+
(
|
10 |
+
)
|
11 |
+
+ plus கூட்டல்
|
12 |
+
= equal to சமம்
|
13 |
+
{
|
14 |
+
}
|
15 |
+
[
|
16 |
+
]
|
17 |
+
"
|
18 |
+
;
|
19 |
+
'
|
20 |
+
< less than லெஸெர் தன்
|
21 |
+
> greater than க்ரேட்டர் தன்
|
22 |
+
, ,
|
23 |
+
. .
|
24 |
+
?
|
25 |
+
₹ rupees ருபாய்
|
26 |
+
“
|
27 |
+
”
|
ssn_parser_new/lists/sv
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
y
|
2 |
+
r
|
3 |
+
l
|
4 |
+
lx
|
5 |
+
zh
|
6 |
+
w
|
ssn_parser_new/lists/syl_list
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
deu
|
2 |
+
galx
|
3 |
+
yum
|
4 |
+
na
|
5 |
+
ga
|
6 |
+
keu
|
7 |
+
dxeu
|
8 |
+
lai
|
9 |
+
rxeu
|
10 |
+
yil
|
11 |
+
teu
|
12 |
+
til
|
13 |
+
txeu
|
14 |
+
kum
|
15 |
+
lum
|
16 |
+
naal
|
17 |
+
nar
|
18 |
+
dxum
|
19 |
+
daan
|
20 |
+
lxai
|
21 |
+
yaa
|
22 |
+
laam
|
23 |
+
war
|
24 |
+
lxum
|
ssn_parser_new/lists/tamil
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
அ
|
2 |
+
ஆ
|
3 |
+
இ
|
4 |
+
ஈ
|
5 |
+
உ
|
6 |
+
ஊ
|
7 |
+
எ
|
8 |
+
ஏ
|
9 |
+
ஐ
|
10 |
+
ஒ
|
11 |
+
ஓ
|
12 |
+
ஔ
|
13 |
+
க
|
14 |
+
ங
|
15 |
+
ச
|
16 |
+
ஜ
|
17 |
+
ஞ
|
18 |
+
ட
|
19 |
+
த
|
20 |
+
ந
|
21 |
+
ண
|
22 |
+
ன
|
23 |
+
ப
|
24 |
+
ம
|
25 |
+
ய
|
26 |
+
ர
|
27 |
+
ற
|
28 |
+
ல
|
29 |
+
ள
|
30 |
+
ழ
|
31 |
+
வ
|
32 |
+
ஷ
|
33 |
+
ஸ
|
34 |
+
ஹ
|
35 |
+
ஃப
|
36 |
+
ா
|
37 |
+
ி
|
38 |
+
ீ
|
39 |
+
ு
|
40 |
+
ூ
|
41 |
+
ெ
|
42 |
+
ே
|
43 |
+
ை
|
44 |
+
ொ
|
45 |
+
ோ
|
46 |
+
ௌ
|
47 |
+
்
|
48 |
+
|
ssn_parser_new/lists/tamil_map
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# map file containing tamil characters [ ] english characters
|
2 |
+
# Multiple phonemes mapped to a single character are yet to be handled.
|
3 |
+
# Has 12 vowels,(18+5) consonants.
|
4 |
+
! !
|
5 |
+
@ @
|
6 |
+
" "
|
7 |
+
# #
|
8 |
+
% %
|
9 |
+
' '
|
10 |
+
( (
|
11 |
+
) )
|
12 |
+
* *
|
13 |
+
+ +
|
14 |
+
, ,
|
15 |
+
_ _
|
16 |
+
- -
|
17 |
+
. .
|
18 |
+
/ /
|
19 |
+
; ;
|
20 |
+
0 0
|
21 |
+
1 1
|
22 |
+
2 2
|
23 |
+
3 3
|
24 |
+
4 4
|
25 |
+
5 5
|
26 |
+
6 6
|
27 |
+
7 7
|
28 |
+
8 8
|
29 |
+
9 9
|
30 |
+
a a
|
31 |
+
b b
|
32 |
+
c c
|
33 |
+
d d
|
34 |
+
e e
|
35 |
+
f f
|
36 |
+
g g
|
37 |
+
h h
|
38 |
+
i i
|
39 |
+
j j
|
40 |
+
k k
|
41 |
+
l l
|
42 |
+
m m
|
43 |
+
n n
|
44 |
+
o o
|
45 |
+
p p
|
46 |
+
q q
|
47 |
+
r r
|
48 |
+
s s
|
49 |
+
t t
|
50 |
+
u u
|
51 |
+
v v
|
52 |
+
w w
|
53 |
+
x x
|
54 |
+
y y
|
55 |
+
z z
|
56 |
+
A A
|
57 |
+
B B
|
58 |
+
C C
|
59 |
+
D D
|
60 |
+
E E
|
61 |
+
F F
|
62 |
+
G G
|
63 |
+
H H
|
64 |
+
I I
|
65 |
+
J J
|
66 |
+
K K
|
67 |
+
L L
|
68 |
+
M M
|
69 |
+
N N
|
70 |
+
O O
|
71 |
+
P P
|
72 |
+
Q Q
|
73 |
+
R R
|
74 |
+
S S
|
75 |
+
T T
|
76 |
+
U U
|
77 |
+
V V
|
78 |
+
W W
|
79 |
+
X X
|
80 |
+
Y Y
|
81 |
+
Z Z
|
82 |
+
ா aa
|
83 |
+
ி i
|
84 |
+
ீ ii
|
85 |
+
ு u
|
86 |
+
ூ uu
|
87 |
+
ெ e
|
88 |
+
ே ee
|
89 |
+
ை ai
|
90 |
+
ொ o
|
91 |
+
ோ oo
|
92 |
+
ௌ au
|
93 |
+
அ a
|
94 |
+
ஆ aa
|
95 |
+
இ i
|
96 |
+
ஈ ii
|
97 |
+
உ u
|
98 |
+
ஊ uu
|
99 |
+
எ e
|
100 |
+
ஏ ee
|
101 |
+
ஐ ai
|
102 |
+
ஒ o
|
103 |
+
ஓ oo
|
104 |
+
ஔ au
|
105 |
+
க k
|
106 |
+
க் k
|
107 |
+
ங ng
|
108 |
+
ங் ng
|
109 |
+
ச c
|
110 |
+
ச் c
|
111 |
+
ஜ j
|
112 |
+
ஜ் j
|
113 |
+
ஞ nj
|
114 |
+
ஞ் nj
|
115 |
+
ட tx
|
116 |
+
ட் tx
|
117 |
+
த t
|
118 |
+
த் t
|
119 |
+
ந nd
|
120 |
+
ந் nd
|
121 |
+
ண nx
|
122 |
+
ண் nx
|
123 |
+
ன n
|
124 |
+
ன் n
|
125 |
+
ப p
|
126 |
+
ப் p
|
127 |
+
ம m
|
128 |
+
ம் m
|
129 |
+
ய y
|
130 |
+
ய் y
|
131 |
+
ர r
|
132 |
+
ர் r
|
133 |
+
ற rx
|
134 |
+
ற் rx
|
135 |
+
ல l
|
136 |
+
ல் l
|
137 |
+
ள lx
|
138 |
+
ள் lx
|
139 |
+
ழ zh
|
140 |
+
ழ் zh
|
141 |
+
வ w
|
142 |
+
வ் w
|
143 |
+
ஷ sx
|
144 |
+
ஷ் sx
|
145 |
+
ஸ s
|
146 |
+
ஸ் s
|
147 |
+
ஹ h
|
148 |
+
ஹ் h
|
149 |
+
ஃப f
|
150 |
+
ஃப் f
|
ssn_parser_new/lists/u_list
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
k
|
2 |
+
t
|
3 |
+
c
|
4 |
+
p
|
ssn_parser_new/lists/vowel_list
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
aa
|
3 |
+
i
|
4 |
+
ii
|
5 |
+
u
|
6 |
+
uu
|
7 |
+
e
|
8 |
+
ee
|
9 |
+
ai
|
10 |
+
o
|
11 |
+
oo
|
12 |
+
au
|
13 |
+
eu
|
ssn_parser_new/lists/vowels
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
அ
|
2 |
+
ஆ
|
3 |
+
இ
|
4 |
+
ஈ
|
5 |
+
உ
|
6 |
+
ஊ
|
7 |
+
எ
|
8 |
+
ஏ
|
9 |
+
ஐ
|
10 |
+
ஒ
|
11 |
+
ஓ
|
12 |
+
ஔ
|
13 |
+
अ
|
14 |
+
आ
|
15 |
+
इ
|
16 |
+
ई
|
17 |
+
उ
|
18 |
+
ऊ
|
19 |
+
ऋ
|
20 |
+
ए
|
21 |
+
ऐ
|
22 |
+
ओ
|
23 |
+
औ
|
24 |
+
അ
|
25 |
+
ആ
|
26 |
+
ഇ
|
27 |
+
ഈ
|
28 |
+
ഉ
|
29 |
+
ഊ
|
30 |
+
ഋ
|
31 |
+
എ
|
32 |
+
ഏ
|
33 |
+
ഐ
|
34 |
+
ഒ
|
35 |
+
ഓ
|
36 |
+
ഔ
|
37 |
+
అ
|
38 |
+
ఆ
|
39 |
+
ఇ
|
40 |
+
ఈ
|
41 |
+
ఉ
|
42 |
+
ఊ
|
43 |
+
ఎ
|
44 |
+
ఏ
|
45 |
+
ఐ
|
46 |
+
ఒ
|
47 |
+
ఓ
|
48 |
+
ఔ
|
ssn_parser_new/lists/vuv_list
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
k g ng
|
2 |
+
t d nd
|
3 |
+
p b m
|
4 |
+
tx dx nx
|
ssn_parser_new/lists/word1
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
முயற்சியை
|
ssn_parser_new/non_parallel-parser.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
import subprocess
|
4 |
+
|
5 |
+
def process_word(word, phone_file_name):
|
6 |
+
with open('tempword', 'w') as tempword_file:
|
7 |
+
tempword_file.write(word)
|
8 |
+
|
9 |
+
|
10 |
+
os.system('python scripts/vul.py tempword 2> temp_output_string')
|
11 |
+
|
12 |
+
output = ''
|
13 |
+
out_str = ''
|
14 |
+
|
15 |
+
with open('lists/out_word') as out_word_file:
|
16 |
+
output = out_word_file.read()
|
17 |
+
|
18 |
+
with open('temp_output_string') as temp_output_string_file:
|
19 |
+
out_str = temp_output_string_file.read()
|
20 |
+
|
21 |
+
if out_str != '':
|
22 |
+
with open(f'{phone_file_name}.err', 'a') as err_file:
|
23 |
+
err_file.write(word + '\n')
|
24 |
+
else:
|
25 |
+
with open(f'{phone_file_name}.words', 'a') as words_file:
|
26 |
+
words_file.write(word + '\n')
|
27 |
+
with open(f'{phone_file_name}.cls', 'a') as cls_file:
|
28 |
+
cls_file.write(output + '\n')
|
29 |
+
|
30 |
+
os.system('rm -rf phn tempword lists/tmp lists/nasal lists/trans_word lists/out_word')
|
31 |
+
|
32 |
+
def main():
|
33 |
+
if len(sys.argv) != 5:
|
34 |
+
print("Usage: python script.py unique_words output_file_name parser_path rand_num")
|
35 |
+
sys.exit(1)
|
36 |
+
|
37 |
+
unique_words = sys.argv[1]
|
38 |
+
output_file_name = sys.argv[2]
|
39 |
+
parser_path = sys.argv[3]
|
40 |
+
rand_num = sys.argv[4]
|
41 |
+
phone_file_name = 'phone_out_file'
|
42 |
+
|
43 |
+
os.system(f'cp {unique_words} {parser_path}/')
|
44 |
+
curr_path = os.getcwd()
|
45 |
+
os.chdir(parser_path)
|
46 |
+
|
47 |
+
os.system(f'rm {phone_file_name}.words {phone_file_name}.cls {phone_file_name}.err {phone_file_name}')
|
48 |
+
os.system('rm -rf temp_output_string phn tempword lists/tmp lists/nasal lists/trans_word lists/out_word')
|
49 |
+
|
50 |
+
with open(unique_words) as unique_words_file:
|
51 |
+
for word in unique_words_file:
|
52 |
+
process_word(word.strip(), phone_file_name)
|
53 |
+
|
54 |
+
os.system(f'rm -rf temp_output_string phn tempword lists/tmp lists/nasal lists/trans_word lists/out_word')
|
55 |
+
|
56 |
+
os.system(f'cp {phone_file_name}.cls {phone_file_name}')
|
57 |
+
os.system(f'sed -i \'s/ /""/g\' {phone_file_name}')
|
58 |
+
os.system(f'sed -i \'s/^/""/g\' {phone_file_name}')
|
59 |
+
os.system(f'sed -i \'s/$/""/g\' {phone_file_name}')
|
60 |
+
subprocess.run(['python', 'get_phone_mapped_text.py', phone_file_name])
|
61 |
+
os.system(f'sed -i \'s/"//g\' {phone_file_name}')
|
62 |
+
os.system(f'sed -i \'s/ //g\' {phone_file_name}')
|
63 |
+
|
64 |
+
words_str = ''
|
65 |
+
with open(f'{phone_file_name}.words') as words_file:
|
66 |
+
words_str = words_file.read()
|
67 |
+
|
68 |
+
if words_str != '':
|
69 |
+
os.system(f'paste -d\'\\t\' {phone_file_name}.words {phone_file_name} > {output_file_name}')
|
70 |
+
else:
|
71 |
+
os.system(f'touch {output_file_name}')
|
72 |
+
|
73 |
+
err_str = ''
|
74 |
+
# with open(f'{phone_file_name}.err') as err_file:
|
75 |
+
# err_str = err_file.read()
|
76 |
+
|
77 |
+
try:
|
78 |
+
with open(f'{phone_file_name}.err') as err_file:
|
79 |
+
err_str = err_file.read()
|
80 |
+
except FileNotFoundError:
|
81 |
+
# File not found, create the file
|
82 |
+
with open(f'{phone_file_name}.err', 'w') as err_file:
|
83 |
+
# Optionally, you can write some initial content to the file
|
84 |
+
err_file.write(f'Error {FileNotFoundError}')
|
85 |
+
|
86 |
+
# if err_str != '':
|
87 |
+
# os.system(f'bash phonify_wrapper.sh {parser_path}/{phone_file_name}.err {output_file_name}.err.out {rand_num} {curr_path}/ssn_parser/')
|
88 |
+
# os.system(f'cat {output_file_name}.err.out >> {output_file_name}')
|
89 |
+
|
90 |
+
os.chdir(curr_path)
|
91 |
+
|
92 |
+
if __name__ == "__main__":
|
93 |
+
main()
|
ssn_parser_new/output
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
ऐadimuga
|
3 |
+
af
|
4 |
+
afkamum
|
5 |
+
afkam
|
6 |
+
afkAmऐ
|
7 |
+
afki
|
8 |
+
aftaङgஉk
|
9 |
+
aftaडuङgAl
|
ssn_parser_new/output.cls
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
ai a d i m u g a
|
3 |
+
a f
|
4 |
+
a f k a m u m
|
5 |
+
a f k a m
|
6 |
+
a f k aa m ai
|
7 |
+
a f k i
|
8 |
+
a f t a ng g eu k
|
9 |
+
a f t a dx u ng g aa l
|
ssn_parser_new/output.err
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
௨
|
ssn_parser_new/output.words
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
அ
|
2 |
+
அஇஅதிமுக
|
3 |
+
அஃ
|
4 |
+
அஃகமும்
|
5 |
+
அஃகம்
|
6 |
+
அஃகாமை
|
7 |
+
அஃகி
|
8 |
+
அஃதங்குக்
|
9 |
+
அஃதடுங்கால்
|
ssn_parser_new/phone_out_file
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
ऐadimuga
|
3 |
+
af
|
4 |
+
afkamum
|
5 |
+
afkam
|
6 |
+
afkAmऐ
|
7 |
+
afki
|
8 |
+
aftaङgउk
|
9 |
+
aftaडuङgAl
|
ssn_parser_new/phone_out_file.cls
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
ai a d i m u g a
|
3 |
+
a f
|
4 |
+
a f k a m u m
|
5 |
+
a f k a m
|
6 |
+
a f k aa m ai
|
7 |
+
a f k i
|
8 |
+
a f t a ng g eu k
|
9 |
+
a f t a dx u ng g aa l
|
ssn_parser_new/phone_out_file.err
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
௨
|
ssn_parser_new/phone_out_file.words
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
அ
|
2 |
+
அஇஅதிமுக
|
3 |
+
அஃ
|
4 |
+
அஃகமும்
|
5 |
+
அஃகம்
|
6 |
+
அஃகாமை
|
7 |
+
அஃகி
|
8 |
+
அஃதங்குக்
|
9 |
+
அஃதடுங்கால்
|
ssn_parser_new/phonify_wrapper.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import subprocess
|
3 |
+
|
4 |
+
def main():
|
5 |
+
inpFile = sys.argv[1]
|
6 |
+
outFile = sys.argv[2]
|
7 |
+
randNum = sys.argv[3]
|
8 |
+
basePath = sys.argv[4]
|
9 |
+
currPath = os.getcwd()
|
10 |
+
unifParFold = os.path.join(basePath, 'unified_parser')
|
11 |
+
uniParOut = f'.uniOut_{randNum}.txt'
|
12 |
+
uniParList = inpFile
|
13 |
+
uniParTemp = f'.uniTemp_{randNum}.txt'
|
14 |
+
|
15 |
+
print("The data is successfully reached")
|
16 |
+
os.chdir(unifParFold)
|
17 |
+
os.mkdir(f'uniPar_{randNum}')
|
18 |
+
|
19 |
+
nj = int(subprocess.check_output(['wc', '-l', inpFile]).decode().split()[0]) # number of parallel jobs
|
20 |
+
if nj > 48:
|
21 |
+
nj = 48
|
22 |
+
|
23 |
+
with open(uniParList, 'r') as infile:
|
24 |
+
with open(uniParTemp, 'w') as tempfile:
|
25 |
+
for i, line in enumerate(infile, start=1):
|
26 |
+
tempfile.write(f"{line.rstrip()}\tuniPar_{randNum}/word_{i:04d}.txt\n")
|
27 |
+
|
28 |
+
command = f"awk '{{printf \"%s\\tuniPar_{randNum}/word_%04d.txt\\n\", $0, NR}}' {uniParList} | \
|
29 |
+
parallel -j {nj} --colsep '\t' 'valgrind ./unified-parser {{1}} {{2}} 1 0 0 0 > /dev/null 2> /dev/null' > /dev/null 2> /dev/null"
|
30 |
+
subprocess.run(command, shell=True, check=True)
|
31 |
+
|
32 |
+
os.system(f"cat uniPar_{randNum}/*.txt > {uniParTemp}")
|
33 |
+
os.rmdir(f'uniPar_{randNum}')
|
34 |
+
|
35 |
+
subprocess.run(['bash', 'get_phone_mapped_text_updated.sh', uniParTemp, uniParOut])
|
36 |
+
|
37 |
+
os.system(f"sed -i \"s:^(set! wordstruct '::g\" {uniParOut}")
|
38 |
+
os.system(f"sed -i 's:[)(\"0 ]::g' {uniParOut}")
|
39 |
+
|
40 |
+
command = f"paste -d' ' {uniParList} {uniParOut} >> {outFile}"
|
41 |
+
os.system(command)
|
42 |
+
|
43 |
+
os.remove(uniParTemp)
|
44 |
+
os.remove(uniParOut)
|
45 |
+
|
46 |
+
os.chdir(currPath)
|
47 |
+
|
48 |
+
if __name__ == "__main__":
|
49 |
+
import sys
|
50 |
+
main()
|
ssn_parser_new/scripts/ortho_to_phonetic1.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
def cat(file):
|
4 |
+
with open(file, 'r') as f:
|
5 |
+
return f.read()
|
6 |
+
|
7 |
+
def ortho_to_phonetic(input_file, phone_list_file, output_file):
|
8 |
+
with open(input_file, 'r') as f:
|
9 |
+
words = f.read().split()
|
10 |
+
|
11 |
+
with open(phone_list_file, 'r') as f:
|
12 |
+
phone_list = set(f.read().splitlines())
|
13 |
+
|
14 |
+
word_start = 0
|
15 |
+
with open(output_file, 'w') as phn_handle:
|
16 |
+
while word_start < len(words):
|
17 |
+
word = words[word_start]
|
18 |
+
if word != "SIL":
|
19 |
+
num = len(word)
|
20 |
+
phone_start1 = 0
|
21 |
+
while phone_start1 < num:
|
22 |
+
p1 = word[phone_start1:phone_start1 + 2]
|
23 |
+
p2 = word[phone_start1:phone_start1 + 3]
|
24 |
+
p3 = word[phone_start1:phone_start1 + 4]
|
25 |
+
p4 = word[phone_start1:phone_start1 + 5]
|
26 |
+
p5 = word[phone_start1:phone_start1 + 6]
|
27 |
+
|
28 |
+
cou = len(set(re.findall(rf'\b{re.escape(p1)}\b', cat(phone_list_file))))
|
29 |
+
cou1 = len(set(re.findall(rf'\b{re.escape(p2)}\b', cat(phone_list_file))))
|
30 |
+
cou2 = len(set(re.findall(rf'\b{re.escape(p3)}\b', cat(phone_list_file))))
|
31 |
+
cou3 = len(set(re.findall(rf'\b{re.escape(p4)}\b', cat(phone_list_file))))
|
32 |
+
cou4 = len(set(re.findall(rf'\b{re.escape(p5)}\b', cat(phone_list_file))))
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
if cou4 == 1:
|
38 |
+
phn_handle.write(p5 + "\n")
|
39 |
+
phone_start1 += 6
|
40 |
+
elif cou3 == 1:
|
41 |
+
phn_handle.write(p4 + "\n")
|
42 |
+
phone_start1 += 5
|
43 |
+
elif cou2 == 1:
|
44 |
+
phn_handle.write(p3 + "\n")
|
45 |
+
phone_start1 += 4
|
46 |
+
elif cou1 == 1:
|
47 |
+
phn_handle.write(p2 + "\n")
|
48 |
+
phone_start1 += 3
|
49 |
+
elif cou == 1:
|
50 |
+
phn_handle.write(p1 + "\n")
|
51 |
+
phone_start1 += 2
|
52 |
+
else:
|
53 |
+
p1 = word[phone_start1]
|
54 |
+
if p1 in [",", "."]:
|
55 |
+
phone_start1 += 1
|
56 |
+
else:
|
57 |
+
phn_handle.write(p1 + "\n")
|
58 |
+
phone_start1 += 1
|
59 |
+
else:
|
60 |
+
phn_handle.write("SIL\n")
|
61 |
+
break
|
62 |
+
word_start += 1
|
63 |
+
|
64 |
+
if __name__ == "__main__":
|
65 |
+
import sys
|
66 |
+
|
67 |
+
if len(sys.argv) != 4:
|
68 |
+
print("Usage: python script.py input_file phone_list output_file")
|
69 |
+
sys.exit(0)
|
70 |
+
|
71 |
+
#print("Test -- 6")
|
72 |
+
input_file, phone_list_file, output_file = sys.argv[1], sys.argv[2], sys.argv[3]
|
73 |
+
|
74 |
+
#print("output_file", output_file)
|
75 |
+
ortho_to_phonetic(input_file, phone_list_file, output_file)
|
ssn_parser_new/scripts/tamil_trans_py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:764578b6dceb6b64d25fffd712738861a3dc29914920d022d52d7f96b68e700a
|
3 |
+
size 17577056
|