# -*- coding: utf-8 -*- # # Copyright (c) 2013-present, Anoop Kunchukuttan # All rights reserved. # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # # Program to transliterate acronyms from one Latin script to Indic languages # # @author Anoop Kunchukuttan # from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator import random class LatinToIndicAcronymTransliterator(object): LATIN_TO_DEVANAGARI_TRANSTABLE = str.maketrans( { "a": "ए", "b": "बी", "c": "सी", "d": "डी", "e": "ई", "f": "एफ", "g": "जी", "h": "एच", "i": "आई", "j": "जे", "k": "के", "l": "एल", "m": "एम", "n": "एन", "o": "ओ", "p": "पी", "q": "क्यू", "r": "आर", "s": "एस", "t": "टी", "u": "यू", "v": "वी", "w": "डब्ल्यू", "x": "एक्स", "y": "वाय", "z": "जेड", } ) # a_unichr=ord('a') # alphabet = [ chr(a_unichr+n) for n in range(26) ] LATIN_ALPHABET = [ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", ] @staticmethod def get_transtable(): return LatinToIndicAcronymTransliterator.LATIN_TO_DEVANAGARI_TRANSTABLE @staticmethod def transliterate(w, lang): return UnicodeIndicTransliterator.transliterate( w.lower().translate( LatinToIndicAcronymTransliterator.LATIN_TO_DEVANAGARI_TRANSTABLE ), "hi", lang, ) @staticmethod def generate_latin_acronyms(num_acronyms, min_len=2, max_len=6, strategy="random"): """ generate Latin acronyms in lower case """ def sample_acronym(strategy="random"): if strategy == "random": slen = random.randint(min_len, max_len) return "".join( random.choices( LatinToIndicAcronymTransliterator.LATIN_ALPHABET, k=slen ) ) return [sample_acronym(strategy) for i in range(num_acronyms)]