diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..393bd2a25e09c965fdee938b39980ce9275770e2 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,7 @@ +__pycache__/ +*.pyc +*.pyo +*.pyd +.env +.venv/ +.git/ \ No newline at end of file diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..148c320042f3e4bab8b0346bfdc3ae77b3760eb7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +libs/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +libs/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +libs/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.o filter=lfs diff=lfs merge=lfs -text +libs/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/processor.o filter=lfs diff=lfs merge=lfs -text +libs/IndicTransToolkit/IndicTransToolkit/processor.cp313-win_amd64.pyd filter=lfs diff=lfs merge=lfs -text +libs/IndicTransToolkit/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text diff --git a/DockerFile b/DockerFile new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..749afdcfba6f98824dd580db1975f29fbabdf59e --- /dev/null +++ b/app.py @@ -0,0 +1,40 @@ +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +import torch +from transformers import AutoModelForSeq2SeqLM, AutoTokenizer +import os +import sys + +sys.path.append(os.path.abspath("libs/IndicTransToolkit")) +from IndicTransToolkit.processor import IndicProcessor + +app = FastAPI(title="IndicTrans Translator API") + +ip = IndicProcessor(inference=True) +tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True) +model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True) + +class TranslationRequest(BaseModel): + text: str + target_lang: str + +@app.post("/translate") +def translate_text(req: TranslationRequest): + if not req.text.strip(): + raise HTTPException(status_code=400, detail="Input text is empty.") + + try: + batch = ip.preprocess_batch([req.text], src_lang="eng_Latn", tgt_lang=req.target_lang) + batch = tokenizer(batch, padding="longest", truncation=True, max_length=256, return_tensors="pt") + + with torch.inference_mode(): + outputs = model.generate(**batch, num_beams=5, max_length=256) + + with tokenizer.as_target_tokenizer(): + decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True) + + translated = ip.postprocess_batch(decoded, lang=req.target_lang)[0] + return {"translation": translated} + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) diff --git a/libs/IndicTransToolkit/.gitignore b/libs/IndicTransToolkit/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..d8cfb021707ecf8be6bf803b621467ec0f01e1b6 --- /dev/null +++ b/libs/IndicTransToolkit/.gitignore @@ -0,0 +1,4 @@ +dist/ +build/ +*.egg-info/ +*/*/__pycache__/ \ No newline at end of file diff --git a/libs/IndicTransToolkit/CHANGELOG.md b/libs/IndicTransToolkit/CHANGELOG.md new file mode 100644 index 0000000000000000000000000000000000000000..66d30ab1ae4a572c61be4a02115d4034f0c0f5e4 --- /dev/null +++ b/libs/IndicTransToolkit/CHANGELOG.md @@ -0,0 +1,16 @@ +# Changelog + +# 📢 Release v1.0.3 +- 🚨 The `IndicProcessor` class has been re-written in [Cython](https://github.com/cython/cython) for faster implementation. This gives us atleast `+10 lines/s`. +- A new `visualize` argument as been added to `preprocess_batch` to track the processing with a `tqdm` bar. + +# 📢 Release v1.0.2 +- The repository has been renamed to `IndicTransToolkit`. +- 🚨 The custom tokenizer is now **removed** from the repository. Please revert to a previous commit ([v1.0.1](https://github.com/VarunGumma/IndicTransToolkit/tree/0e68fb5872f4d821578a5252f90ad43c9649370f)) to use it **(strongly discouraged)**. The official _(and only tokenizer)_ is available on HF along with the models. + +# 📢 Release v1.0.0 +- The [PreTrainedTokenizer](https://huggingface.co/docs/transformers/main_classes/tokenizer) for IndicTrans2 is now available on HF 🎉🎉 Note that, you still need the `IndicProcessor` to pre-process the sentences before tokenization. +- 🚨 **In favor of the standard PreTrainedTokenizer, we deprecated the custom tokenizer. However, this custom tokenizer will still be available here for backward compatibility, but no further updates/bug-fixes will be provided.** +- The `indic_evaluate` function is now consolidated into a concrete `IndicEvaluator` class. +- The data collation function for training is consolidated into a concrete `IndicDataCollator` class. +- A simple batching method is now available in the `IndicProcessor`. \ No newline at end of file diff --git a/libs/IndicTransToolkit/IndicTransToolkit.egg-info/PKG-INFO b/libs/IndicTransToolkit/IndicTransToolkit.egg-info/PKG-INFO new file mode 100644 index 0000000000000000000000000000000000000000..16a4162b1eef848f4fac4bb2559fbdddd2f980ff --- /dev/null +++ b/libs/IndicTransToolkit/IndicTransToolkit.egg-info/PKG-INFO @@ -0,0 +1,131 @@ +Metadata-Version: 2.4 +Name: IndicTransToolkit +Version: 1.0.3 +Summary: A simple, consistent, and extendable module for IndicTrans2 tokenizer compatible with HuggingFace models +Home-page: https://github.com/VarunGumma/IndicTransToolkit +Author: Varun Gumma +Author-email: varun230999@gmail.com +License: MIT +Classifier: Programming Language :: Python :: 3 +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Requires-Python: >=3.8 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: setuptools>=68.2.2 +Requires-Dist: torch +Requires-Dist: cython +Requires-Dist: sacremoses +Requires-Dist: sentencepiece +Requires-Dist: transformers +Requires-Dist: sacrebleu +Requires-Dist: indic-nlp-library-IT2@ git+https://github.com/VarunGumma/indic_nlp_library.git +Dynamic: author +Dynamic: author-email +Dynamic: classifier +Dynamic: description +Dynamic: description-content-type +Dynamic: home-page +Dynamic: license +Dynamic: license-file +Dynamic: requires-dist +Dynamic: requires-python +Dynamic: summary + +# IndicTransToolkit + +## About +The goal of this repository is to provide a simple, modular, and extendable toolkit for [IndicTrans2](https://github.com/AI4Bharat/IndicTrans2) and be compatible with the HuggingFace models released. Please refer to the `CHANGELOG.md` for latest developments. + +## Pre-requisites + - `Python 3.8+` + - [Indic NLP Library](https://github.com/VarunGumma/indic_nlp_library) + - Other requirements as listed in `requirements.txt` + +## Configuration + - Editable installation (Note, this may take a while): +```bash +git clone https://github.com/VarunGumma/IndicTransToolkit +cd IndicTransToolkit + +pip install --editable . --use-pep517 # required for pip >= 25.0 + +# in case it fails, try: +# pip install --editable . --use-pep517 --config-settings editable_mode=compat +``` + +## Examples +For the training usecase, please refer [here](https://github.com/AI4Bharat/IndicTrans2/tree/main/huggingface_interface). + +### PreTainedTokenizer +```python +import torch +from IndicTransToolkit.processor import IndicProcessor # NOW IMPLEMENTED IN CYTHON !! +from transformers import AutoModelForSeq2SeqLM, AutoTokenizer + +ip = IndicProcessor(inference=True) +tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True) +model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True) + +sentences = [ + "This is a test sentence.", + "This is another longer different test sentence.", + "Please send an SMS to 9876543210 and an email on newemail123@xyz.com by 15th October, 2023.", +] + +batch = ip.preprocess_batch(sentences, src_lang="eng_Latn", tgt_lang="hin_Deva", visualize=False) # set it to visualize=True to print a progress bar +batch = tokenizer(batch, padding="longest", truncation=True, max_length=256, return_tensors="pt") + +with torch.inference_mode(): + outputs = model.generate(**batch, num_beams=5, num_return_sequences=1, max_length=256) + +with tokenizer.as_target_tokenizer(): + # This scoping is absolutely necessary, as it will instruct the tokenizer to tokenize using the target vocabulary. + # Failure to use this scoping will result in gibberish/unexpected predictions as the output will be de-tokenized with the source vocabulary instead. + outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True) + +outputs = ip.postprocess_batch(outputs, lang="hin_Deva") +print(outputs) + +>>> ['यह एक परीक्षण वाक्य है।', 'यह एक और लंबा अलग परीक्षण वाक्य है।', 'कृपया 9876543210 पर एक एस. एम. एस. भेजें और 15 अक्टूबर, 2023 तक newemail123@xyz.com पर एक ईमेल भेजें।'] +``` + +### Evaluation +- `IndicEvaluator` is a python implementation of [compute_metrics.sh](https://github.com/AI4Bharat/IndicTrans2/blob/main/compute_metrics.sh). +- We have found that this python implementation gives slightly lower scores than the original `compute_metrics.sh`. So, please use this function cautiously, and feel free to raise a PR if you have found the bug/fix. +```python +from IndicTransToolkit import IndicEvaluator + +# this method returns a dictionary with BLEU and ChrF2++ scores with appropriate signatures +evaluator = IndicEvaluator() +scores = evaluator.evaluate(tgt_lang=tgt_lang, preds=pred_file, refs=ref_file) + +# alternatively, you can pass the list of predictions and references instead of files +# scores = evaluator.evaluate(tgt_lang=tgt_lang, preds=preds, refs=refs) +``` + +## Authors + - Varun Gumma (varun230999@gmail.com) + - Jay Gala (jaygala24@gmail.com) + - Pranjal Agadh Chitale (pranjalchitale@gmail.com) + - Raj Dabre (prajdabre@gmail.com) + + +## Bugs and Contribution +Since this a bleeding-edge module, you may encounter broken stuff and import issues once in a while. In case you encounter any bugs or want additional functionalities, please feel free to raise `Issues`/`Pull Requests` or contact the authors. + + +## Citation +If you use our codebase, or models, please do cite the following paper: +```bibtex +@article{ + gala2023indictrans, + title={IndicTrans2: Towards High-Quality and Accessible Machine Translation Models for all 22 Scheduled Indian Languages}, + author={Jay Gala and Pranjal A Chitale and A K Raghavan and Varun Gumma and Sumanth Doddapaneni and Aswanth Kumar M and Janki Atul Nawale and Anupama Sujatha and Ratish Puduppully and Vivek Raghavan and Pratyush Kumar and Mitesh M Khapra and Raj Dabre and Anoop Kunchukuttan}, + journal={Transactions on Machine Learning Research}, + issn={2835-8856}, + year={2023}, + url={https://openreview.net/forum?id=vfT4YuzAYA}, + note={} +} +``` diff --git a/libs/IndicTransToolkit/IndicTransToolkit.egg-info/SOURCES.txt b/libs/IndicTransToolkit/IndicTransToolkit.egg-info/SOURCES.txt new file mode 100644 index 0000000000000000000000000000000000000000..88be9e169f1ea2b0582199901d1159428ce6c9b1 --- /dev/null +++ b/libs/IndicTransToolkit/IndicTransToolkit.egg-info/SOURCES.txt @@ -0,0 +1,15 @@ +LICENSE +README.md +pyproject.toml +setup.py +IndicTransToolkit/__init__.py +IndicTransToolkit/collator.py +IndicTransToolkit/evaluator.py +IndicTransToolkit/processor.c +IndicTransToolkit/version.py +IndicTransToolkit.egg-info/PKG-INFO +IndicTransToolkit.egg-info/SOURCES.txt +IndicTransToolkit.egg-info/dependency_links.txt +IndicTransToolkit.egg-info/not-zip-safe +IndicTransToolkit.egg-info/requires.txt +IndicTransToolkit.egg-info/top_level.txt \ No newline at end of file diff --git a/libs/IndicTransToolkit/IndicTransToolkit.egg-info/dependency_links.txt b/libs/IndicTransToolkit/IndicTransToolkit.egg-info/dependency_links.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/libs/IndicTransToolkit/IndicTransToolkit.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/libs/IndicTransToolkit/IndicTransToolkit.egg-info/not-zip-safe b/libs/IndicTransToolkit/IndicTransToolkit.egg-info/not-zip-safe new file mode 100644 index 0000000000000000000000000000000000000000..d3f5a12faa99758192ecc4ed3fc22c9249232e86 --- /dev/null +++ b/libs/IndicTransToolkit/IndicTransToolkit.egg-info/not-zip-safe @@ -0,0 +1 @@ + diff --git a/libs/IndicTransToolkit/IndicTransToolkit.egg-info/requires.txt b/libs/IndicTransToolkit/IndicTransToolkit.egg-info/requires.txt new file mode 100644 index 0000000000000000000000000000000000000000..a0b41de3f606df25802a04f434e72cc9b98f3eee --- /dev/null +++ b/libs/IndicTransToolkit/IndicTransToolkit.egg-info/requires.txt @@ -0,0 +1,8 @@ +setuptools>=68.2.2 +torch +cython +sacremoses +sentencepiece +transformers +sacrebleu +indic-nlp-library-IT2@ git+https://github.com/VarunGumma/indic_nlp_library.git diff --git a/libs/IndicTransToolkit/IndicTransToolkit.egg-info/top_level.txt b/libs/IndicTransToolkit/IndicTransToolkit.egg-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..c33e91aba8100e26cb1db70df9b34419be0d120c --- /dev/null +++ b/libs/IndicTransToolkit/IndicTransToolkit.egg-info/top_level.txt @@ -0,0 +1 @@ +IndicTransToolkit diff --git a/libs/IndicTransToolkit/IndicTransToolkit/__init__.py b/libs/IndicTransToolkit/IndicTransToolkit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1d370140808aab3aeb5990013db0fad93699d117 --- /dev/null +++ b/libs/IndicTransToolkit/IndicTransToolkit/__init__.py @@ -0,0 +1,9 @@ +from .evaluator import IndicEvaluator +from .collator import IndicDataCollator +from .processor import IndicProcessor + +__all__ = [ + "IndicEvaluator", + "IndicDataCollator", + "IndicProcessor", +] \ No newline at end of file diff --git a/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/__init__.cpython-310.pyc b/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cab49bc4bd8583d59dce4968b09aff94d1453886 Binary files /dev/null and b/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/__init__.cpython-310.pyc differ diff --git a/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/__init__.cpython-313.pyc b/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5d029149e1ceb798a666e28e2941f1f73f61843 Binary files /dev/null and b/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/__init__.cpython-313.pyc differ diff --git a/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/collator.cpython-310.pyc b/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/collator.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..42f1c454b2f73d451bc00958667ba0c2509cd8f6 Binary files /dev/null and b/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/collator.cpython-310.pyc differ diff --git a/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/collator.cpython-313.pyc b/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/collator.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..83cdc905b19069a700be3089a97152f6b046b5ee Binary files /dev/null and b/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/collator.cpython-313.pyc differ diff --git a/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/evaluator.cpython-310.pyc b/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/evaluator.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2034d444412e40d6e28d8368ef25da25cde3b031 Binary files /dev/null and b/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/evaluator.cpython-310.pyc differ diff --git a/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/evaluator.cpython-313.pyc b/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/evaluator.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40c5021d333771ac0471c98973e7098623d31ee1 Binary files /dev/null and b/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/evaluator.cpython-313.pyc differ diff --git a/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/processor.cpython-310.pyc b/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/processor.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df0a143c1ea780adb349783e7410783f06a6aee6 Binary files /dev/null and b/libs/IndicTransToolkit/IndicTransToolkit/__pycache__/processor.cpython-310.pyc differ diff --git a/libs/IndicTransToolkit/IndicTransToolkit/collator.py b/libs/IndicTransToolkit/IndicTransToolkit/collator.py new file mode 100644 index 0000000000000000000000000000000000000000..fea421a890cb70e0809fa0f16728b15657d63131 --- /dev/null +++ b/libs/IndicTransToolkit/IndicTransToolkit/collator.py @@ -0,0 +1,74 @@ +import numpy as np +from dataclasses import dataclass +from typing import Any, Optional, Union + +from transformers.utils import PaddingStrategy +from transformers.tokenization_utils import PreTrainedTokenizerBase +from transformers.data.data_collator import pad_without_fast_tokenizer_warning + + +@dataclass +class IndicDataCollator: + tokenizer: PreTrainedTokenizerBase + model: Optional[Any] = None + padding: Union[bool, str, PaddingStrategy] = True + max_length: Optional[int] = None + pad_to_multiple_of: Optional[int] = None + label_pad_token_id: int = -100 + return_tensors: str = "pt" + + def __call__(self, features, return_tensors=None): + + if return_tensors is None: + return_tensors = self.return_tensors + + labels = ( + [feature["labels"] for feature in features] + if "labels" in features[0].keys() + else None + ) + # We have to pad the labels before calling `tokenizer.pad` as + # this method won't pad them and needs them of the same length to return tensors. + if labels is not None: + max_label_length = max(len(l) for l in labels) + if self.pad_to_multiple_of is not None: + max_label_length = ( + (max_label_length + self.pad_to_multiple_of - 1) + // self.pad_to_multiple_of + * self.pad_to_multiple_of + ) + + # fairseq by defaults right pad the labels for seq2seq tasks + for feature in features: + remainder = [self.label_pad_token_id] * ( + max_label_length - len(feature["labels"]) + ) + if isinstance(feature["labels"], list): + feature["labels"] = feature["labels"] + remainder + else: + feature["labels"] = np.concatenate( + [feature["labels"], remainder] + ).astype(np.int64) + + self.tokenizer.padding_side = "left" + features = pad_without_fast_tokenizer_warning( + self.tokenizer, + features, + padding=self.padding, + max_length=self.max_length, + return_tensors=return_tensors, + pad_to_multiple_of=self.pad_to_multiple_of, + ) + + # prepare decoder_input_ids + if ( + labels is not None + and self.model is not None + and hasattr(self.model, "prepare_decoder_input_ids_from_labels") + ): + decoder_input_ids = self.model.prepare_decoder_input_ids_from_labels( + labels=features["labels"] + ) + features["decoder_input_ids"] = decoder_input_ids + + return features diff --git a/libs/IndicTransToolkit/IndicTransToolkit/evaluator.py b/libs/IndicTransToolkit/IndicTransToolkit/evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..6860345d4c68f7c1015f6a4a725b53bd846ebc87 --- /dev/null +++ b/libs/IndicTransToolkit/IndicTransToolkit/evaluator.py @@ -0,0 +1,151 @@ +from typing import List, Union +from sacrebleu.metrics import CHRF, BLEU + +from indicnlp.tokenize import indic_tokenize +from indicnlp.normalize.indic_normalize import IndicNormalizerFactory + + +class IndicEvaluator: + def __init__(self): + # === Metrics === + self._chrf2_metric = CHRF(word_order=2) + self._bleu_metric_13a = BLEU(tokenize="13a") + self._bleu_metric_none = BLEU(tokenize="none") + + # === Normalizer factory and cache === + self._indic_norm_factory = IndicNormalizerFactory() + self._normalizer_cache = {} # Cache normalizers by iso_lang + + # === FLORES -> ISO codes === + self._flores_codes = { + "asm_Beng": "as", + "awa_Deva": "hi", + "ben_Beng": "bn", + "bho_Deva": "hi", + "brx_Deva": "hi", + "doi_Deva": "hi", + "eng_Latn": "en", + "gom_Deva": "kK", + "gon_Deva": "hi", + "guj_Gujr": "gu", + "hin_Deva": "hi", + "hne_Deva": "hi", + "kan_Knda": "kn", + "kas_Arab": "ur", + "kas_Deva": "hi", + "kha_Latn": "en", + "lus_Latn": "en", + "mag_Deva": "hi", + "mai_Deva": "hi", + "mal_Mlym": "ml", + "mar_Deva": "mr", + "mni_Beng": "bn", + "mni_Mtei": "hi", + "npi_Deva": "ne", + "ory_Orya": "or", + "pan_Guru": "pa", + "san_Deva": "hi", + "sat_Olck": "or", + "snd_Arab": "ur", + "snd_Deva": "hi", + "tam_Taml": "ta", + "tel_Telu": "te", + "urd_Arab": "ur", + "unr_Deva": "hi", + } + + def _get_normalizer(self, iso_lang: str): + """ + Return a cached normalizer for a given iso_lang. + """ + if iso_lang not in self._normalizer_cache: + self._normalizer_cache[iso_lang] = self._indic_norm_factory.get_normalizer(iso_lang) + return self._normalizer_cache[iso_lang] + + def _preprocess(self, sentences: List[str], lang: str) -> List[str]: + """ + Preprocess the sentences using IndicNLP: + 1) Normalization (using a cached normalizer), + 2) Trivial tokenization. + """ + iso_lang = self._flores_codes.get(lang, "hi") + # Fetch from cache to avoid reconstructing the normalizer + normalizer = self._get_normalizer(iso_lang) + + # Local references for speed + trivial_tokenize = indic_tokenize.trivial_tokenize + normalize_fn = normalizer.normalize + + processed_sentences = [] + for line in sentences: + # single .strip() before normalizing + line = line.strip() + norm_line = normalize_fn(line) + tokens = trivial_tokenize(norm_line, iso_lang) + processed_sentences.append(" ".join(tokens)) + + return processed_sentences + + def evaluate( + self, + tgt_lang: str, + preds: Union[List[str], str], + refs: Union[List[str], str], + ): + """ + Evaluate BLEU and chrF2++ scores for the given predictions and references. + - If preds/refs are strings (filenames), read them from disk. + - If they are lists, evaluate them directly. + - For non-English languages, applies Indic NLP preprocessing before scoring. + """ + assert preds is not None and refs is not None, "Predictions and References cannot be None" + + # Convert file paths to lists if needed + if isinstance(preds, str): + with open(preds, "r", encoding="utf-8") as fp: + preds = [line.strip() for line in fp] + if isinstance(refs, str): + with open(refs, "r", encoding="utf-8") as fr: + refs = [line.strip() for line in fr] + + assert len(preds) == len(refs), "Number of predictions and references do not match" + + # Local references to metrics for speed + bleu_none = self._bleu_metric_none + bleu_13a = self._bleu_metric_13a + chrf2 = self._chrf2_metric + + scores = {} + + # For English (eng_Latn), skip Indic NLP normalization + if tgt_lang != "eng_Latn": + preds_ = self._preprocess(preds, tgt_lang) + refs_ = self._preprocess(refs, tgt_lang) + + bleu_score = bleu_none.corpus_score(preds_, [refs_]) + chrf_score = chrf2.corpus_score(preds_, [refs_]) + + scores["bleu"] = { + "score": round(bleu_score.score, 1), + "signature": bleu_none.get_signature().format(), + } + scores["chrF2++"] = { + "score": round(chrf_score.score, 1), + "signature": chrf2.get_signature().format(), + } + + else: + # For English, 13a tokenization is standard + bleu_score = bleu_13a.corpus_score(preds, [refs]) + chrf_score = chrf2.corpus_score(preds, [refs]) + + scores["bleu"] = { + "score": round(bleu_score.score, 1), + "signature": bleu_13a.get_signature().format(), + } + scores["chrF2++"] = { + "score": round(chrf_score.score, 1), + "signature": chrf2.get_signature().format(), + } + + return scores diff --git a/libs/IndicTransToolkit/IndicTransToolkit/processor.c b/libs/IndicTransToolkit/IndicTransToolkit/processor.c new file mode 100644 index 0000000000000000000000000000000000000000..f29222baaed4c8f6b9128e750e5154a0c0ff935e --- /dev/null +++ b/libs/IndicTransToolkit/IndicTransToolkit/processor.c @@ -0,0 +1,17851 @@ +/* Generated by Cython 3.0.12 */ + +/* BEGIN: Cython Metadata +{ + "distutils": { + "name": "IndicTransToolkit.processor", + "sources": [ + "IndicTransToolkit/processor.pyx" + ] + }, + "module_name": "IndicTransToolkit.processor" +} +END: Cython Metadata */ + +#ifndef PY_SSIZE_T_CLEAN +#define PY_SSIZE_T_CLEAN +#endif /* PY_SSIZE_T_CLEAN */ +#if defined(CYTHON_LIMITED_API) && 0 + #ifndef Py_LIMITED_API + #if CYTHON_LIMITED_API+0 > 0x03030000 + #define Py_LIMITED_API CYTHON_LIMITED_API + #else + #define Py_LIMITED_API 0x03030000 + #endif + #endif +#endif + +#include "Python.h" +#ifndef Py_PYTHON_H + #error Python headers needed to compile C extensions, please install development version of Python. +#elif PY_VERSION_HEX < 0x02070000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000) + #error Cython requires Python 2.7+ or Python 3.3+. +#else +#if defined(CYTHON_LIMITED_API) && CYTHON_LIMITED_API +#define __PYX_EXTRA_ABI_MODULE_NAME "limited" +#else +#define __PYX_EXTRA_ABI_MODULE_NAME "" +#endif +#define CYTHON_ABI "3_0_12" __PYX_EXTRA_ABI_MODULE_NAME +#define __PYX_ABI_MODULE_NAME "_cython_" CYTHON_ABI +#define __PYX_TYPE_MODULE_PREFIX __PYX_ABI_MODULE_NAME "." +#define CYTHON_HEX_VERSION 0x03000CF0 +#define CYTHON_FUTURE_DIVISION 1 +#include +#ifndef offsetof + #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) +#endif +#if !defined(_WIN32) && !defined(WIN32) && !defined(MS_WINDOWS) + #ifndef __stdcall + #define __stdcall + #endif + #ifndef __cdecl + #define __cdecl + #endif + #ifndef __fastcall + #define __fastcall + #endif +#endif +#ifndef DL_IMPORT + #define DL_IMPORT(t) t +#endif +#ifndef DL_EXPORT + #define DL_EXPORT(t) t +#endif +#define __PYX_COMMA , +#ifndef HAVE_LONG_LONG + #define HAVE_LONG_LONG +#endif +#ifndef PY_LONG_LONG + #define PY_LONG_LONG LONG_LONG +#endif +#ifndef Py_HUGE_VAL + #define Py_HUGE_VAL HUGE_VAL +#endif +#define __PYX_LIMITED_VERSION_HEX PY_VERSION_HEX +#if defined(GRAALVM_PYTHON) + /* For very preliminary testing purposes. Most variables are set the same as PyPy. + The existence of this section does not imply that anything works or is even tested */ + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 1 + #define CYTHON_COMPILING_IN_NOGIL 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #undef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #undef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 1 + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #undef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) + #endif + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #undef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 + #endif + #undef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 +#elif defined(PYPY_VERSION) + #define CYTHON_COMPILING_IN_PYPY 1 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #ifndef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #endif + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #undef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 1 + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #undef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) + #endif + #if PY_VERSION_HEX < 0x03090000 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #undef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1 && PYPY_VERSION_NUM >= 0x07030C00) + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 + #endif + #undef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 +#elif defined(CYTHON_LIMITED_API) + #ifdef Py_LIMITED_API + #undef __PYX_LIMITED_VERSION_HEX + #define __PYX_LIMITED_VERSION_HEX Py_LIMITED_API + #endif + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 1 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 0 + #undef CYTHON_CLINE_IN_TRACEBACK + #define CYTHON_CLINE_IN_TRACEBACK 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #undef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 1 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #ifndef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #endif + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #undef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS 1 + #endif + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #undef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 1 + #ifndef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #endif + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 + #endif + #undef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 +#elif defined(Py_GIL_DISABLED) || defined(Py_NOGIL) + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 1 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #ifndef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #endif + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #ifndef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #ifndef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #ifndef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 1 + #endif + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS 1 + #endif + #ifndef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #ifndef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #endif + #ifndef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 1 + #endif + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 + #endif + #ifndef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 + #endif +#else + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 1 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 0 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #ifndef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #endif + #ifndef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 1 + #endif + #if PY_MAJOR_VERSION < 3 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #ifndef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 1 + #endif + #ifndef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 1 + #endif + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #if PY_VERSION_HEX < 0x030300F0 || PY_VERSION_HEX >= 0x030B00A2 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #elif !defined(CYTHON_USE_UNICODE_WRITER) + #define CYTHON_USE_UNICODE_WRITER 1 + #endif + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #ifndef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 1 + #endif + #ifndef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL (PY_MAJOR_VERSION < 3 || PY_VERSION_HEX >= 0x03060000 && PY_VERSION_HEX < 0x030C00A6) + #endif + #ifndef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL (PY_VERSION_HEX >= 0x030700A1) + #endif + #ifndef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 1 + #endif + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS 1 + #endif + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #ifndef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #endif + #if PY_VERSION_HEX < 0x030400a1 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #elif !defined(CYTHON_USE_TP_FINALIZE) + #define CYTHON_USE_TP_FINALIZE 1 + #endif + #if PY_VERSION_HEX < 0x030600B1 + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #elif !defined(CYTHON_USE_DICT_VERSIONS) + #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX < 0x030C00A5) + #endif + #if PY_VERSION_HEX < 0x030700A3 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #elif !defined(CYTHON_USE_EXC_INFO_STACK) + #define CYTHON_USE_EXC_INFO_STACK 1 + #endif + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 + #endif + #ifndef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 1 + #endif +#endif +#if !defined(CYTHON_FAST_PYCCALL) +#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1) +#endif +#if !defined(CYTHON_VECTORCALL) +#define CYTHON_VECTORCALL (CYTHON_FAST_PYCCALL && PY_VERSION_HEX >= 0x030800B1) +#endif +#define CYTHON_BACKPORT_VECTORCALL (CYTHON_METH_FASTCALL && PY_VERSION_HEX < 0x030800B1) +#if CYTHON_USE_PYLONG_INTERNALS + #if PY_MAJOR_VERSION < 3 + #include "longintrepr.h" + #endif + #undef SHIFT + #undef BASE + #undef MASK + #ifdef SIZEOF_VOID_P + enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) }; + #endif +#endif +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif +#ifndef __has_cpp_attribute + #define __has_cpp_attribute(x) 0 +#endif +#ifndef CYTHON_RESTRICT + #if defined(__GNUC__) + #define CYTHON_RESTRICT __restrict__ + #elif defined(_MSC_VER) && _MSC_VER >= 1400 + #define CYTHON_RESTRICT __restrict + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_RESTRICT restrict + #else + #define CYTHON_RESTRICT + #endif +#endif +#ifndef CYTHON_UNUSED + #if defined(__cplusplus) + /* for clang __has_cpp_attribute(maybe_unused) is true even before C++17 + * but leads to warnings with -pedantic, since it is a C++17 feature */ + #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) + #if __has_cpp_attribute(maybe_unused) + #define CYTHON_UNUSED [[maybe_unused]] + #endif + #endif + #endif +#endif +#ifndef CYTHON_UNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +#endif +#ifndef CYTHON_UNUSED_VAR +# if defined(__cplusplus) + template void CYTHON_UNUSED_VAR( const T& ) { } +# else +# define CYTHON_UNUSED_VAR(x) (void)(x) +# endif +#endif +#ifndef CYTHON_MAYBE_UNUSED_VAR + #define CYTHON_MAYBE_UNUSED_VAR(x) CYTHON_UNUSED_VAR(x) +#endif +#ifndef CYTHON_NCP_UNUSED +# if CYTHON_COMPILING_IN_CPYTHON +# define CYTHON_NCP_UNUSED +# else +# define CYTHON_NCP_UNUSED CYTHON_UNUSED +# endif +#endif +#ifndef CYTHON_USE_CPP_STD_MOVE + #if defined(__cplusplus) && (\ + __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1600)) + #define CYTHON_USE_CPP_STD_MOVE 1 + #else + #define CYTHON_USE_CPP_STD_MOVE 0 + #endif +#endif +#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) +#ifdef _MSC_VER + #ifndef _MSC_STDINT_H_ + #if _MSC_VER < 1300 + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + #else + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; + #endif + #endif + #if _MSC_VER < 1300 + #ifdef _WIN64 + typedef unsigned long long __pyx_uintptr_t; + #else + typedef unsigned int __pyx_uintptr_t; + #endif + #else + #ifdef _WIN64 + typedef unsigned __int64 __pyx_uintptr_t; + #else + typedef unsigned __int32 __pyx_uintptr_t; + #endif + #endif +#else + #include + typedef uintptr_t __pyx_uintptr_t; +#endif +#ifndef CYTHON_FALLTHROUGH + #if defined(__cplusplus) + /* for clang __has_cpp_attribute(fallthrough) is true even before C++17 + * but leads to warnings with -pedantic, since it is a C++17 feature */ + #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) + #if __has_cpp_attribute(fallthrough) + #define CYTHON_FALLTHROUGH [[fallthrough]] + #endif + #endif + #ifndef CYTHON_FALLTHROUGH + #if __has_cpp_attribute(clang::fallthrough) + #define CYTHON_FALLTHROUGH [[clang::fallthrough]] + #elif __has_cpp_attribute(gnu::fallthrough) + #define CYTHON_FALLTHROUGH [[gnu::fallthrough]] + #endif + #endif + #endif + #ifndef CYTHON_FALLTHROUGH + #if __has_attribute(fallthrough) + #define CYTHON_FALLTHROUGH __attribute__((fallthrough)) + #else + #define CYTHON_FALLTHROUGH + #endif + #endif + #if defined(__clang__) && defined(__apple_build_version__) + #if __apple_build_version__ < 7000000 + #undef CYTHON_FALLTHROUGH + #define CYTHON_FALLTHROUGH + #endif + #endif +#endif +#ifdef __cplusplus + template + struct __PYX_IS_UNSIGNED_IMPL {static const bool value = T(0) < T(-1);}; + #define __PYX_IS_UNSIGNED(type) (__PYX_IS_UNSIGNED_IMPL::value) +#else + #define __PYX_IS_UNSIGNED(type) (((type)-1) > 0) +#endif +#if CYTHON_COMPILING_IN_PYPY == 1 + #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x030A0000) +#else + #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000) +#endif +#define __PYX_REINTERPRET_FUNCION(func_pointer, other_pointer) ((func_pointer)(void(*)(void))(other_pointer)) + +#ifndef CYTHON_INLINE + #if defined(__clang__) + #define CYTHON_INLINE __inline__ __attribute__ ((__unused__)) + #elif defined(__GNUC__) + #define CYTHON_INLINE __inline__ + #elif defined(_MSC_VER) + #define CYTHON_INLINE __inline + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_INLINE inline + #else + #define CYTHON_INLINE + #endif +#endif + +#define __PYX_BUILD_PY_SSIZE_T "n" +#define CYTHON_FORMAT_SSIZE_T "z" +#if PY_MAJOR_VERSION < 3 + #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" + #define __Pyx_DefaultClassType PyClass_Type + #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#else + #define __Pyx_BUILTIN_MODULE_NAME "builtins" + #define __Pyx_DefaultClassType PyType_Type +#if CYTHON_COMPILING_IN_LIMITED_API + static CYTHON_INLINE PyObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, + PyObject *code, PyObject *c, PyObject* n, PyObject *v, + PyObject *fv, PyObject *cell, PyObject* fn, + PyObject *name, int fline, PyObject *lnos) { + PyObject *exception_table = NULL; + PyObject *types_module=NULL, *code_type=NULL, *result=NULL; + #if __PYX_LIMITED_VERSION_HEX < 0x030B0000 + PyObject *version_info; + PyObject *py_minor_version = NULL; + #endif + long minor_version = 0; + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + #if __PYX_LIMITED_VERSION_HEX >= 0x030B0000 + minor_version = 11; + #else + if (!(version_info = PySys_GetObject("version_info"))) goto end; + if (!(py_minor_version = PySequence_GetItem(version_info, 1))) goto end; + minor_version = PyLong_AsLong(py_minor_version); + Py_DECREF(py_minor_version); + if (minor_version == -1 && PyErr_Occurred()) goto end; + #endif + if (!(types_module = PyImport_ImportModule("types"))) goto end; + if (!(code_type = PyObject_GetAttrString(types_module, "CodeType"))) goto end; + if (minor_version <= 7) { + (void)p; + result = PyObject_CallFunction(code_type, "iiiiiOOOOOOiOO", a, k, l, s, f, code, + c, n, v, fn, name, fline, lnos, fv, cell); + } else if (minor_version <= 10) { + result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOiOO", a,p, k, l, s, f, code, + c, n, v, fn, name, fline, lnos, fv, cell); + } else { + if (!(exception_table = PyBytes_FromStringAndSize(NULL, 0))) goto end; + result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOOiOO", a,p, k, l, s, f, code, + c, n, v, fn, name, name, fline, lnos, exception_table, fv, cell); + } + end: + Py_XDECREF(code_type); + Py_XDECREF(exception_table); + Py_XDECREF(types_module); + if (type) { + PyErr_Restore(type, value, traceback); + } + return result; + } + #ifndef CO_OPTIMIZED + #define CO_OPTIMIZED 0x0001 + #endif + #ifndef CO_NEWLOCALS + #define CO_NEWLOCALS 0x0002 + #endif + #ifndef CO_VARARGS + #define CO_VARARGS 0x0004 + #endif + #ifndef CO_VARKEYWORDS + #define CO_VARKEYWORDS 0x0008 + #endif + #ifndef CO_ASYNC_GENERATOR + #define CO_ASYNC_GENERATOR 0x0200 + #endif + #ifndef CO_GENERATOR + #define CO_GENERATOR 0x0020 + #endif + #ifndef CO_COROUTINE + #define CO_COROUTINE 0x0080 + #endif +#elif PY_VERSION_HEX >= 0x030B0000 + static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, + PyObject *code, PyObject *c, PyObject* n, PyObject *v, + PyObject *fv, PyObject *cell, PyObject* fn, + PyObject *name, int fline, PyObject *lnos) { + PyCodeObject *result; + PyObject *empty_bytes = PyBytes_FromStringAndSize("", 0); + if (!empty_bytes) return NULL; + result = + #if PY_VERSION_HEX >= 0x030C0000 + PyUnstable_Code_NewWithPosOnlyArgs + #else + PyCode_NewWithPosOnlyArgs + #endif + (a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, name, fline, lnos, empty_bytes); + Py_DECREF(empty_bytes); + return result; + } +#elif PY_VERSION_HEX >= 0x030800B2 && !CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_NewWithPosOnlyArgs(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#else + #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#endif +#endif +#if PY_VERSION_HEX >= 0x030900A4 || defined(Py_IS_TYPE) + #define __Pyx_IS_TYPE(ob, type) Py_IS_TYPE(ob, type) +#else + #define __Pyx_IS_TYPE(ob, type) (((const PyObject*)ob)->ob_type == (type)) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_Is) + #define __Pyx_Py_Is(x, y) Py_Is(x, y) +#else + #define __Pyx_Py_Is(x, y) ((x) == (y)) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsNone) + #define __Pyx_Py_IsNone(ob) Py_IsNone(ob) +#else + #define __Pyx_Py_IsNone(ob) __Pyx_Py_Is((ob), Py_None) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsTrue) + #define __Pyx_Py_IsTrue(ob) Py_IsTrue(ob) +#else + #define __Pyx_Py_IsTrue(ob) __Pyx_Py_Is((ob), Py_True) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsFalse) + #define __Pyx_Py_IsFalse(ob) Py_IsFalse(ob) +#else + #define __Pyx_Py_IsFalse(ob) __Pyx_Py_Is((ob), Py_False) +#endif +#define __Pyx_NoneAsNull(obj) (__Pyx_Py_IsNone(obj) ? NULL : (obj)) +#if PY_VERSION_HEX >= 0x030900F0 && !CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyObject_GC_IsFinalized(o) PyObject_GC_IsFinalized(o) +#else + #define __Pyx_PyObject_GC_IsFinalized(o) _PyGC_FINALIZED(o) +#endif +#ifndef CO_COROUTINE + #define CO_COROUTINE 0x80 +#endif +#ifndef CO_ASYNC_GENERATOR + #define CO_ASYNC_GENERATOR 0x200 +#endif +#ifndef Py_TPFLAGS_CHECKTYPES + #define Py_TPFLAGS_CHECKTYPES 0 +#endif +#ifndef Py_TPFLAGS_HAVE_INDEX + #define Py_TPFLAGS_HAVE_INDEX 0 +#endif +#ifndef Py_TPFLAGS_HAVE_NEWBUFFER + #define Py_TPFLAGS_HAVE_NEWBUFFER 0 +#endif +#ifndef Py_TPFLAGS_HAVE_FINALIZE + #define Py_TPFLAGS_HAVE_FINALIZE 0 +#endif +#ifndef Py_TPFLAGS_SEQUENCE + #define Py_TPFLAGS_SEQUENCE 0 +#endif +#ifndef Py_TPFLAGS_MAPPING + #define Py_TPFLAGS_MAPPING 0 +#endif +#ifndef METH_STACKLESS + #define METH_STACKLESS 0 +#endif +#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL) + #ifndef METH_FASTCALL + #define METH_FASTCALL 0x80 + #endif + typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs); + typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args, + Py_ssize_t nargs, PyObject *kwnames); +#else + #if PY_VERSION_HEX >= 0x030d00A4 + # define __Pyx_PyCFunctionFast PyCFunctionFast + # define __Pyx_PyCFunctionFastWithKeywords PyCFunctionFastWithKeywords + #else + # define __Pyx_PyCFunctionFast _PyCFunctionFast + # define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords + #endif +#endif +#if CYTHON_METH_FASTCALL + #define __Pyx_METH_FASTCALL METH_FASTCALL + #define __Pyx_PyCFunction_FastCall __Pyx_PyCFunctionFast + #define __Pyx_PyCFunction_FastCallWithKeywords __Pyx_PyCFunctionFastWithKeywords +#else + #define __Pyx_METH_FASTCALL METH_VARARGS + #define __Pyx_PyCFunction_FastCall PyCFunction + #define __Pyx_PyCFunction_FastCallWithKeywords PyCFunctionWithKeywords +#endif +#if CYTHON_VECTORCALL + #define __pyx_vectorcallfunc vectorcallfunc + #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET PY_VECTORCALL_ARGUMENTS_OFFSET + #define __Pyx_PyVectorcall_NARGS(n) PyVectorcall_NARGS((size_t)(n)) +#elif CYTHON_BACKPORT_VECTORCALL + typedef PyObject *(*__pyx_vectorcallfunc)(PyObject *callable, PyObject *const *args, + size_t nargsf, PyObject *kwnames); + #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET ((size_t)1 << (8 * sizeof(size_t) - 1)) + #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(((size_t)(n)) & ~__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET)) +#else + #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET 0 + #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(n)) +#endif +#if PY_MAJOR_VERSION >= 0x030900B1 +#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_CheckExact(func) +#else +#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_Check(func) +#endif +#define __Pyx_CyOrPyCFunction_Check(func) PyCFunction_Check(func) +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) (((PyCFunctionObject*)(func))->m_ml->ml_meth) +#elif !CYTHON_COMPILING_IN_LIMITED_API +#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) PyCFunction_GET_FUNCTION(func) +#endif +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_CyOrPyCFunction_GET_FLAGS(func) (((PyCFunctionObject*)(func))->m_ml->ml_flags) +static CYTHON_INLINE PyObject* __Pyx_CyOrPyCFunction_GET_SELF(PyObject *func) { + return (__Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_STATIC) ? NULL : ((PyCFunctionObject*)func)->m_self; +} +#endif +static CYTHON_INLINE int __Pyx__IsSameCFunction(PyObject *func, void *cfunc) { +#if CYTHON_COMPILING_IN_LIMITED_API + return PyCFunction_Check(func) && PyCFunction_GetFunction(func) == (PyCFunction) cfunc; +#else + return PyCFunction_Check(func) && PyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; +#endif +} +#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCFunction(func, cfunc) +#if __PYX_LIMITED_VERSION_HEX < 0x030900B1 + #define __Pyx_PyType_FromModuleAndSpec(m, s, b) ((void)m, PyType_FromSpecWithBases(s, b)) + typedef PyObject *(*__Pyx_PyCMethod)(PyObject *, PyTypeObject *, PyObject *const *, size_t, PyObject *); +#else + #define __Pyx_PyType_FromModuleAndSpec(m, s, b) PyType_FromModuleAndSpec(m, s, b) + #define __Pyx_PyCMethod PyCMethod +#endif +#ifndef METH_METHOD + #define METH_METHOD 0x200 +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc) + #define PyObject_Malloc(s) PyMem_Malloc(s) + #define PyObject_Free(p) PyMem_Free(p) + #define PyObject_Realloc(p) PyMem_Realloc(p) +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) +#else + #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno) +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_PyThreadState_Current PyThreadState_Get() +#elif !CYTHON_FAST_THREAD_STATE + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#elif PY_VERSION_HEX >= 0x030d00A1 + #define __Pyx_PyThreadState_Current PyThreadState_GetUnchecked() +#elif PY_VERSION_HEX >= 0x03060000 + #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet() +#elif PY_VERSION_HEX >= 0x03000000 + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#else + #define __Pyx_PyThreadState_Current _PyThreadState_Current +#endif +#if CYTHON_COMPILING_IN_LIMITED_API +static CYTHON_INLINE void *__Pyx_PyModule_GetState(PyObject *op) +{ + void *result; + result = PyModule_GetState(op); + if (!result) + Py_FatalError("Couldn't find the module state"); + return result; +} +#endif +#define __Pyx_PyObject_GetSlot(obj, name, func_ctype) __Pyx_PyType_GetSlot(Py_TYPE(obj), name, func_ctype) +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((func_ctype) PyType_GetSlot((type), Py_##name)) +#else + #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((type)->name) +#endif +#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT) +#include "pythread.h" +#define Py_tss_NEEDS_INIT 0 +typedef int Py_tss_t; +static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) { + *key = PyThread_create_key(); + return 0; +} +static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) { + Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t)); + *key = Py_tss_NEEDS_INIT; + return key; +} +static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) { + PyObject_Free(key); +} +static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) { + return *key != Py_tss_NEEDS_INIT; +} +static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) { + PyThread_delete_key(*key); + *key = Py_tss_NEEDS_INIT; +} +static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) { + return PyThread_set_key_value(*key, value); +} +static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { + return PyThread_get_key_value(*key); +} +#endif +#if PY_MAJOR_VERSION < 3 + #if CYTHON_COMPILING_IN_PYPY + #if PYPY_VERSION_NUM < 0x07030600 + #if defined(__cplusplus) && __cplusplus >= 201402L + [[deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")]] + #elif defined(__GNUC__) || defined(__clang__) + __attribute__ ((__deprecated__("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6"))) + #elif defined(_MSC_VER) + __declspec(deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")) + #endif + static CYTHON_INLINE int PyGILState_Check(void) { + return 0; + } + #else // PYPY_VERSION_NUM < 0x07030600 + #endif // PYPY_VERSION_NUM < 0x07030600 + #else + static CYTHON_INLINE int PyGILState_Check(void) { + PyThreadState * tstate = _PyThreadState_Current; + return tstate && (tstate == PyGILState_GetThisThreadState()); + } + #endif +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 || defined(_PyDict_NewPresized) +#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n)) +#else +#define __Pyx_PyDict_NewPresized(n) PyDict_New() +#endif +#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION + #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) +#else + #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX > 0x030600B4 && PY_VERSION_HEX < 0x030d0000 && CYTHON_USE_UNICODE_INTERNALS +#define __Pyx_PyDict_GetItemStrWithError(dict, name) _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash) +static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStr(PyObject *dict, PyObject *name) { + PyObject *res = __Pyx_PyDict_GetItemStrWithError(dict, name); + if (res == NULL) PyErr_Clear(); + return res; +} +#elif PY_MAJOR_VERSION >= 3 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07020000) +#define __Pyx_PyDict_GetItemStrWithError PyDict_GetItemWithError +#define __Pyx_PyDict_GetItemStr PyDict_GetItem +#else +static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStrWithError(PyObject *dict, PyObject *name) { +#if CYTHON_COMPILING_IN_PYPY + return PyDict_GetItem(dict, name); +#else + PyDictEntry *ep; + PyDictObject *mp = (PyDictObject*) dict; + long hash = ((PyStringObject *) name)->ob_shash; + assert(hash != -1); + ep = (mp->ma_lookup)(mp, name, hash); + if (ep == NULL) { + return NULL; + } + return ep->me_value; +#endif +} +#define __Pyx_PyDict_GetItemStr PyDict_GetItem +#endif +#if CYTHON_USE_TYPE_SLOTS + #define __Pyx_PyType_GetFlags(tp) (((PyTypeObject *)tp)->tp_flags) + #define __Pyx_PyType_HasFeature(type, feature) ((__Pyx_PyType_GetFlags(type) & (feature)) != 0) + #define __Pyx_PyObject_GetIterNextFunc(obj) (Py_TYPE(obj)->tp_iternext) +#else + #define __Pyx_PyType_GetFlags(tp) (PyType_GetFlags((PyTypeObject *)tp)) + #define __Pyx_PyType_HasFeature(type, feature) PyType_HasFeature(type, feature) + #define __Pyx_PyObject_GetIterNextFunc(obj) PyIter_Next +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_SetItemOnTypeDict(tp, k, v) PyObject_GenericSetAttr((PyObject*)tp, k, v) +#else + #define __Pyx_SetItemOnTypeDict(tp, k, v) PyDict_SetItem(tp->tp_dict, k, v) +#endif +#if CYTHON_USE_TYPE_SPECS && PY_VERSION_HEX >= 0x03080000 +#define __Pyx_PyHeapTypeObject_GC_Del(obj) {\ + PyTypeObject *type = Py_TYPE((PyObject*)obj);\ + assert(__Pyx_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE));\ + PyObject_GC_Del(obj);\ + Py_DECREF(type);\ +} +#else +#define __Pyx_PyHeapTypeObject_GC_Del(obj) PyObject_GC_Del(obj) +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define CYTHON_PEP393_ENABLED 1 + #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GetLength(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_ReadChar(u, i) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((void)u, 1114111U) + #define __Pyx_PyUnicode_KIND(u) ((void)u, (0)) + #define __Pyx_PyUnicode_DATA(u) ((void*)u) + #define __Pyx_PyUnicode_READ(k, d, i) ((void)k, PyUnicode_ReadChar((PyObject*)(d), i)) + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GetLength(u)) +#elif PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) + #define CYTHON_PEP393_ENABLED 1 + #if PY_VERSION_HEX >= 0x030C0000 + #define __Pyx_PyUnicode_READY(op) (0) + #else + #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ + 0 : _PyUnicode_Ready((PyObject *)(op))) + #endif + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u) + #define __Pyx_PyUnicode_KIND(u) ((int)PyUnicode_KIND(u)) + #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) + #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, (Py_UCS4) ch) + #if PY_VERSION_HEX >= 0x030C0000 + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u)) + #else + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000 + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length)) + #else + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) + #endif + #endif +#else + #define CYTHON_PEP393_ENABLED 0 + #define PyUnicode_1BYTE_KIND 1 + #define PyUnicode_2BYTE_KIND 2 + #define PyUnicode_4BYTE_KIND 4 + #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535U : 1114111U) + #define __Pyx_PyUnicode_KIND(u) ((int)sizeof(Py_UNICODE)) + #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) + #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = (Py_UNICODE) ch) + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u)) +#endif +#if CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) +#else + #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ + PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) +#endif +#if CYTHON_COMPILING_IN_PYPY + #if !defined(PyUnicode_DecodeUnicodeEscape) + #define PyUnicode_DecodeUnicodeEscape(s, size, errors) PyUnicode_Decode(s, size, "unicode_escape", errors) + #endif + #if !defined(PyUnicode_Contains) || (PY_MAJOR_VERSION == 2 && PYPY_VERSION_NUM < 0x07030500) + #undef PyUnicode_Contains + #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) + #endif + #if !defined(PyByteArray_Check) + #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type) + #endif + #if !defined(PyObject_Format) + #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt) + #endif +#endif +#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) +#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) +#else + #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) +#endif +#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII) + #define PyObject_ASCII(o) PyObject_Repr(o) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBaseString_Type PyUnicode_Type + #define PyStringObject PyUnicodeObject + #define PyString_Type PyUnicode_Type + #define PyString_Check PyUnicode_Check + #define PyString_CheckExact PyUnicode_CheckExact +#ifndef PyObject_Unicode + #define PyObject_Unicode PyObject_Str +#endif +#endif +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) + #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) +#else + #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) + #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) +#endif +#if CYTHON_COMPILING_IN_CPYTHON + #define __Pyx_PySequence_ListKeepNew(obj)\ + (likely(PyList_CheckExact(obj) && Py_REFCNT(obj) == 1) ? __Pyx_NewRef(obj) : PySequence_List(obj)) +#else + #define __Pyx_PySequence_ListKeepNew(obj) PySequence_List(obj) +#endif +#ifndef PySet_CheckExact + #define PySet_CheckExact(obj) __Pyx_IS_TYPE(obj, &PySet_Type) +#endif +#if PY_VERSION_HEX >= 0x030900A4 + #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt) + #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size) +#else + #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt) + #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size) +#endif +#if CYTHON_ASSUME_SAFE_MACROS + #define __Pyx_PySequence_ITEM(o, i) PySequence_ITEM(o, i) + #define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq) + #define __Pyx_PyTuple_SET_ITEM(o, i, v) (PyTuple_SET_ITEM(o, i, v), (0)) + #define __Pyx_PyList_SET_ITEM(o, i, v) (PyList_SET_ITEM(o, i, v), (0)) + #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_GET_SIZE(o) + #define __Pyx_PyList_GET_SIZE(o) PyList_GET_SIZE(o) + #define __Pyx_PySet_GET_SIZE(o) PySet_GET_SIZE(o) + #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_GET_SIZE(o) + #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_GET_SIZE(o) +#else + #define __Pyx_PySequence_ITEM(o, i) PySequence_GetItem(o, i) + #define __Pyx_PySequence_SIZE(seq) PySequence_Size(seq) + #define __Pyx_PyTuple_SET_ITEM(o, i, v) PyTuple_SetItem(o, i, v) + #define __Pyx_PyList_SET_ITEM(o, i, v) PyList_SetItem(o, i, v) + #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_Size(o) + #define __Pyx_PyList_GET_SIZE(o) PyList_Size(o) + #define __Pyx_PySet_GET_SIZE(o) PySet_Size(o) + #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_Size(o) + #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_Size(o) +#endif +#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 + #define __Pyx_PyImport_AddModuleRef(name) PyImport_AddModuleRef(name) +#else + static CYTHON_INLINE PyObject *__Pyx_PyImport_AddModuleRef(const char *name) { + PyObject *module = PyImport_AddModule(name); + Py_XINCREF(module); + return module; + } +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyIntObject PyLongObject + #define PyInt_Type PyLong_Type + #define PyInt_Check(op) PyLong_Check(op) + #define PyInt_CheckExact(op) PyLong_CheckExact(op) + #define __Pyx_Py3Int_Check(op) PyLong_Check(op) + #define __Pyx_Py3Int_CheckExact(op) PyLong_CheckExact(op) + #define PyInt_FromString PyLong_FromString + #define PyInt_FromUnicode PyLong_FromUnicode + #define PyInt_FromLong PyLong_FromLong + #define PyInt_FromSize_t PyLong_FromSize_t + #define PyInt_FromSsize_t PyLong_FromSsize_t + #define PyInt_AsLong PyLong_AsLong + #define PyInt_AS_LONG PyLong_AS_LONG + #define PyInt_AsSsize_t PyLong_AsSsize_t + #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask + #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask + #define PyNumber_Int PyNumber_Long +#else + #define __Pyx_Py3Int_Check(op) (PyLong_Check(op) || PyInt_Check(op)) + #define __Pyx_Py3Int_CheckExact(op) (PyLong_CheckExact(op) || PyInt_CheckExact(op)) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBoolObject PyLongObject +#endif +#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY + #ifndef PyUnicode_InternFromString + #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) + #endif +#endif +#if PY_VERSION_HEX < 0x030200A4 + typedef long Py_hash_t; + #define __Pyx_PyInt_FromHash_t PyInt_FromLong + #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsHash_t +#else + #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t + #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsSsize_t +#endif +#if CYTHON_USE_ASYNC_SLOTS + #if PY_VERSION_HEX >= 0x030500B1 + #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods + #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) + #else + #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) + #endif +#else + #define __Pyx_PyType_AsAsync(obj) NULL +#endif +#ifndef __Pyx_PyAsyncMethodsStruct + typedef struct { + unaryfunc am_await; + unaryfunc am_aiter; + unaryfunc am_anext; + } __Pyx_PyAsyncMethodsStruct; +#endif + +#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS) + #if !defined(_USE_MATH_DEFINES) + #define _USE_MATH_DEFINES + #endif +#endif +#include +#ifdef NAN +#define __PYX_NAN() ((float) NAN) +#else +static CYTHON_INLINE float __PYX_NAN() { + float value; + memset(&value, 0xFF, sizeof(value)); + return value; +} +#endif +#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL) +#define __Pyx_truncl trunc +#else +#define __Pyx_truncl truncl +#endif + +#define __PYX_MARK_ERR_POS(f_index, lineno) \ + { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; } +#define __PYX_ERR(f_index, lineno, Ln_error) \ + { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; } + +#ifdef CYTHON_EXTERN_C + #undef __PYX_EXTERN_C + #define __PYX_EXTERN_C CYTHON_EXTERN_C +#elif defined(__PYX_EXTERN_C) + #ifdef _MSC_VER + #pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.") + #else + #warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead. + #endif +#else + #ifdef __cplusplus + #define __PYX_EXTERN_C extern "C" + #else + #define __PYX_EXTERN_C extern + #endif +#endif + +#define __PYX_HAVE__IndicTransToolkit__processor +#define __PYX_HAVE_API__IndicTransToolkit__processor +/* Early includes */ +#ifdef _OPENMP +#include +#endif /* _OPENMP */ + +#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS) +#define CYTHON_WITHOUT_ASSERTIONS +#endif + +typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding; + const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; + +#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8) +#define __PYX_DEFAULT_STRING_ENCODING "" +#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString +#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#define __Pyx_uchar_cast(c) ((unsigned char)c) +#define __Pyx_long_cast(x) ((long)x) +#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ + (sizeof(type) < sizeof(Py_ssize_t)) ||\ + (sizeof(type) > sizeof(Py_ssize_t) &&\ + likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX) &&\ + (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ + v == (type)PY_SSIZE_T_MIN))) ||\ + (sizeof(type) == sizeof(Py_ssize_t) &&\ + (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX))) ) +static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) { + return (size_t) i < (size_t) limit; +} +#if defined (__cplusplus) && __cplusplus >= 201103L + #include + #define __Pyx_sst_abs(value) std::abs(value) +#elif SIZEOF_INT >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) abs(value) +#elif SIZEOF_LONG >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) labs(value) +#elif defined (_MSC_VER) + #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value)) +#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define __Pyx_sst_abs(value) llabs(value) +#elif defined (__GNUC__) + #define __Pyx_sst_abs(value) __builtin_llabs(value) +#else + #define __Pyx_sst_abs(value) ((value<0) ? -value : value) +#endif +static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s); +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*); +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); +static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char*); +#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) +#define __Pyx_PyBytes_FromString PyBytes_FromString +#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); +#if PY_MAJOR_VERSION < 3 + #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#else + #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize +#endif +#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyObject_AsWritableString(s) ((char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableSString(s) ((signed char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) +#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) +#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) +#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) +#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) +#define __Pyx_PyUnicode_FromOrdinal(o) PyUnicode_FromOrdinal((int)o) +#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode +#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) +#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) +static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b); +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); +static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*); +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x); +#define __Pyx_PySequence_Tuple(obj)\ + (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj)) +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); +static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*); +#if CYTHON_ASSUME_SAFE_MACROS +#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) +#else +#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) +#endif +#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) +#if PY_MAJOR_VERSION >= 3 +#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x)) +#else +#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x)) +#endif +#if CYTHON_USE_PYLONG_INTERNALS + #if PY_VERSION_HEX >= 0x030C00A7 + #ifndef _PyLong_SIGN_MASK + #define _PyLong_SIGN_MASK 3 + #endif + #ifndef _PyLong_NON_SIZE_BITS + #define _PyLong_NON_SIZE_BITS 3 + #endif + #define __Pyx_PyLong_Sign(x) (((PyLongObject*)x)->long_value.lv_tag & _PyLong_SIGN_MASK) + #define __Pyx_PyLong_IsNeg(x) ((__Pyx_PyLong_Sign(x) & 2) != 0) + #define __Pyx_PyLong_IsNonNeg(x) (!__Pyx_PyLong_IsNeg(x)) + #define __Pyx_PyLong_IsZero(x) (__Pyx_PyLong_Sign(x) & 1) + #define __Pyx_PyLong_IsPos(x) (__Pyx_PyLong_Sign(x) == 0) + #define __Pyx_PyLong_CompactValueUnsigned(x) (__Pyx_PyLong_Digits(x)[0]) + #define __Pyx_PyLong_DigitCount(x) ((Py_ssize_t) (((PyLongObject*)x)->long_value.lv_tag >> _PyLong_NON_SIZE_BITS)) + #define __Pyx_PyLong_SignedDigitCount(x)\ + ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * __Pyx_PyLong_DigitCount(x)) + #if defined(PyUnstable_Long_IsCompact) && defined(PyUnstable_Long_CompactValue) + #define __Pyx_PyLong_IsCompact(x) PyUnstable_Long_IsCompact((PyLongObject*) x) + #define __Pyx_PyLong_CompactValue(x) PyUnstable_Long_CompactValue((PyLongObject*) x) + #else + #define __Pyx_PyLong_IsCompact(x) (((PyLongObject*)x)->long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS)) + #define __Pyx_PyLong_CompactValue(x) ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * (Py_ssize_t) __Pyx_PyLong_Digits(x)[0]) + #endif + typedef Py_ssize_t __Pyx_compact_pylong; + typedef size_t __Pyx_compact_upylong; + #else + #define __Pyx_PyLong_IsNeg(x) (Py_SIZE(x) < 0) + #define __Pyx_PyLong_IsNonNeg(x) (Py_SIZE(x) >= 0) + #define __Pyx_PyLong_IsZero(x) (Py_SIZE(x) == 0) + #define __Pyx_PyLong_IsPos(x) (Py_SIZE(x) > 0) + #define __Pyx_PyLong_CompactValueUnsigned(x) ((Py_SIZE(x) == 0) ? 0 : __Pyx_PyLong_Digits(x)[0]) + #define __Pyx_PyLong_DigitCount(x) __Pyx_sst_abs(Py_SIZE(x)) + #define __Pyx_PyLong_SignedDigitCount(x) Py_SIZE(x) + #define __Pyx_PyLong_IsCompact(x) (Py_SIZE(x) == 0 || Py_SIZE(x) == 1 || Py_SIZE(x) == -1) + #define __Pyx_PyLong_CompactValue(x)\ + ((Py_SIZE(x) == 0) ? (sdigit) 0 : ((Py_SIZE(x) < 0) ? -(sdigit)__Pyx_PyLong_Digits(x)[0] : (sdigit)__Pyx_PyLong_Digits(x)[0])) + typedef sdigit __Pyx_compact_pylong; + typedef digit __Pyx_compact_upylong; + #endif + #if PY_VERSION_HEX >= 0x030C00A5 + #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->long_value.ob_digit) + #else + #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->ob_digit) + #endif +#endif +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII +#include +static int __Pyx_sys_getdefaultencoding_not_ascii; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + PyObject* ascii_chars_u = NULL; + PyObject* ascii_chars_b = NULL; + const char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + if (strcmp(default_encoding_c, "ascii") == 0) { + __Pyx_sys_getdefaultencoding_not_ascii = 0; + } else { + char ascii_chars[128]; + int c; + for (c = 0; c < 128; c++) { + ascii_chars[c] = (char) c; + } + __Pyx_sys_getdefaultencoding_not_ascii = 1; + ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); + if (!ascii_chars_u) goto bad; + ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); + if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { + PyErr_Format( + PyExc_ValueError, + "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", + default_encoding_c); + goto bad; + } + Py_DECREF(ascii_chars_u); + Py_DECREF(ascii_chars_b); + } + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + Py_XDECREF(ascii_chars_u); + Py_XDECREF(ascii_chars_b); + return -1; +} +#endif +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) +#else +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +#include +static char* __PYX_DEFAULT_STRING_ENCODING; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1); + if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; + strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + return -1; +} +#endif +#endif + + +/* Test for GCC > 2.95 */ +#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) + #define likely(x) __builtin_expect(!!(x), 1) + #define unlikely(x) __builtin_expect(!!(x), 0) +#else /* !__GNUC__ or GCC < 2.95 */ + #define likely(x) (x) + #define unlikely(x) (x) +#endif /* __GNUC__ */ +static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; } + +#if !CYTHON_USE_MODULE_STATE +static PyObject *__pyx_m = NULL; +#endif +static int __pyx_lineno; +static int __pyx_clineno = 0; +static const char * __pyx_cfilenm = __FILE__; +static const char *__pyx_filename; + +/* #### Code section: filename_table ### */ + +static const char *__pyx_f[] = { + "IndicTransToolkit\\\\processor.pyx", + "", +}; +/* #### Code section: utility_code_proto_before_types ### */ +/* ForceInitThreads.proto */ +#ifndef __PYX_FORCE_INIT_THREADS + #define __PYX_FORCE_INIT_THREADS 0 +#endif + +/* #### Code section: numeric_typedefs ### */ +/* #### Code section: complex_type_declarations ### */ +/* #### Code section: type_declarations ### */ + +/*--- Type declarations ---*/ +struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor; +struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch; +struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch; + +/* "IndicTransToolkit/processor.pyx":449 + * + * # Exposed Method: Preprocess a Batch of Sentences + * cpdef list preprocess_batch( # <<<<<<<<<<<<<< + * self, + * List[str] batch, + */ +struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch { + int __pyx_n; + PyObject *tgt_lang; + int is_target; + int visualize; +}; + +/* "IndicTransToolkit/processor.pyx":479 + * + * # Exposed Method: Postprocess a Batch of Sentences + * cpdef list postprocess_batch( # <<<<<<<<<<<<<< + * self, + * List[str] sents, + */ +struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch { + int __pyx_n; + PyObject *lang; + int visualize; +}; + +/* "IndicTransToolkit/processor.pyx":20 + * + * + * cdef class IndicProcessor: # <<<<<<<<<<<<<< + * cdef public bint inference + * + */ +struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor { + PyObject_HEAD + struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *__pyx_vtab; + int inference; + PyObject *_MULTISPACE_REGEX; + PyObject *_DIGIT_SPACE_PERCENT; + PyObject *_DOUBLE_QUOT_PUNC; + PyObject *_DIGIT_NBSP_DIGIT; + PyObject *_END_BRACKET_SPACE_PUNC_REGEX; + PyObject *_URL_PATTERN; + PyObject *_NUMERAL_PATTERN; + PyObject *_EMAIL_PATTERN; + PyObject *_OTHER_PATTERN; + PyObject *_PUNC_REPLACEMENTS; + PyObject *_INDIC_FAILURE_CASES; + PyObject *_flores_codes; + PyObject *_digits_translation_table; + PyObject *_placeholder_entity_maps; + PyObject *_en_tok; + PyObject *_en_normalizer; + PyObject *_en_detok; + PyObject *_xliterator; +}; + + + +struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor { + PyObject *(*_apply_punc_replacements)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *); + PyObject *(*_punc_norm)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *); + PyObject *(*_wrap_with_placeholders)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *); + PyObject *(*_normalize)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *); + PyObject *(*_do_indic_tokenize_and_transliterate)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *, PyObject *, int); + PyObject *(*_preprocess)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *, PyObject *, PyObject *, int); + PyObject *(*_postprocess)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *); + PyObject *(*preprocess_batch)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch *__pyx_optional_args); + PyObject *(*postprocess_batch)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch *__pyx_optional_args); +}; +static struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *__pyx_vtabptr_17IndicTransToolkit_9processor_IndicProcessor; +/* #### Code section: utility_code_proto ### */ + +/* --- Runtime support code (head) --- */ +/* Refnanny.proto */ +#ifndef CYTHON_REFNANNY + #define CYTHON_REFNANNY 0 +#endif +#if CYTHON_REFNANNY + typedef struct { + void (*INCREF)(void*, PyObject*, Py_ssize_t); + void (*DECREF)(void*, PyObject*, Py_ssize_t); + void (*GOTREF)(void*, PyObject*, Py_ssize_t); + void (*GIVEREF)(void*, PyObject*, Py_ssize_t); + void* (*SetupContext)(const char*, Py_ssize_t, const char*); + void (*FinishContext)(void**); + } __Pyx_RefNannyAPIStruct; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); + #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; +#ifdef WITH_THREAD + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + if (acquire_gil) {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ + PyGILState_Release(__pyx_gilstate_save);\ + } else {\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ + } + #define __Pyx_RefNannyFinishContextNogil() {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __Pyx_RefNannyFinishContext();\ + PyGILState_Release(__pyx_gilstate_save);\ + } +#else + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__)) + #define __Pyx_RefNannyFinishContextNogil() __Pyx_RefNannyFinishContext() +#endif + #define __Pyx_RefNannyFinishContextNogil() {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __Pyx_RefNannyFinishContext();\ + PyGILState_Release(__pyx_gilstate_save);\ + } + #define __Pyx_RefNannyFinishContext()\ + __Pyx_RefNanny->FinishContext(&__pyx_refnanny) + #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_XINCREF(r) do { if((r) == NULL); else {__Pyx_INCREF(r); }} while(0) + #define __Pyx_XDECREF(r) do { if((r) == NULL); else {__Pyx_DECREF(r); }} while(0) + #define __Pyx_XGOTREF(r) do { if((r) == NULL); else {__Pyx_GOTREF(r); }} while(0) + #define __Pyx_XGIVEREF(r) do { if((r) == NULL); else {__Pyx_GIVEREF(r);}} while(0) +#else + #define __Pyx_RefNannyDeclarations + #define __Pyx_RefNannySetupContext(name, acquire_gil) + #define __Pyx_RefNannyFinishContextNogil() + #define __Pyx_RefNannyFinishContext() + #define __Pyx_INCREF(r) Py_INCREF(r) + #define __Pyx_DECREF(r) Py_DECREF(r) + #define __Pyx_GOTREF(r) + #define __Pyx_GIVEREF(r) + #define __Pyx_XINCREF(r) Py_XINCREF(r) + #define __Pyx_XDECREF(r) Py_XDECREF(r) + #define __Pyx_XGOTREF(r) + #define __Pyx_XGIVEREF(r) +#endif +#define __Pyx_Py_XDECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; Py_XDECREF(tmp);\ + } while (0) +#define __Pyx_XDECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_XDECREF(tmp);\ + } while (0) +#define __Pyx_DECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_DECREF(tmp);\ + } while (0) +#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) +#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) + +/* PyErrExceptionMatches.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err) +static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err); +#else +#define __Pyx_PyErr_ExceptionMatches(err) PyErr_ExceptionMatches(err) +#endif + +/* PyThreadStateGet.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; +#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current; +#if PY_VERSION_HEX >= 0x030C00A6 +#define __Pyx_PyErr_Occurred() (__pyx_tstate->current_exception != NULL) +#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->current_exception ? (PyObject*) Py_TYPE(__pyx_tstate->current_exception) : (PyObject*) NULL) +#else +#define __Pyx_PyErr_Occurred() (__pyx_tstate->curexc_type != NULL) +#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->curexc_type) +#endif +#else +#define __Pyx_PyThreadState_declare +#define __Pyx_PyThreadState_assign +#define __Pyx_PyErr_Occurred() (PyErr_Occurred() != NULL) +#define __Pyx_PyErr_CurrentExceptionType() PyErr_Occurred() +#endif + +/* PyErrFetchRestore.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL) +#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A6 +#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL)) +#else +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#endif +#else +#define __Pyx_PyErr_Clear() PyErr_Clear() +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb) +#endif + +/* PyObjectGetAttrStr.proto */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name); +#else +#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) +#endif + +/* PyObjectGetAttrStrNoError.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name); + +/* GetBuiltinName.proto */ +static PyObject *__Pyx_GetBuiltinName(PyObject *name); + +/* TupleAndListFromArray.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n); +static CYTHON_INLINE PyObject* __Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n); +#endif + +/* IncludeStringH.proto */ +#include + +/* BytesEquals.proto */ +static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); + +/* UnicodeEquals.proto */ +static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); + +/* fastcall.proto */ +#if CYTHON_AVOID_BORROWED_REFS + #define __Pyx_Arg_VARARGS(args, i) PySequence_GetItem(args, i) +#elif CYTHON_ASSUME_SAFE_MACROS + #define __Pyx_Arg_VARARGS(args, i) PyTuple_GET_ITEM(args, i) +#else + #define __Pyx_Arg_VARARGS(args, i) PyTuple_GetItem(args, i) +#endif +#if CYTHON_AVOID_BORROWED_REFS + #define __Pyx_Arg_NewRef_VARARGS(arg) __Pyx_NewRef(arg) + #define __Pyx_Arg_XDECREF_VARARGS(arg) Py_XDECREF(arg) +#else + #define __Pyx_Arg_NewRef_VARARGS(arg) arg + #define __Pyx_Arg_XDECREF_VARARGS(arg) +#endif +#define __Pyx_NumKwargs_VARARGS(kwds) PyDict_Size(kwds) +#define __Pyx_KwValues_VARARGS(args, nargs) NULL +#define __Pyx_GetKwValue_VARARGS(kw, kwvalues, s) __Pyx_PyDict_GetItemStrWithError(kw, s) +#define __Pyx_KwargsAsDict_VARARGS(kw, kwvalues) PyDict_Copy(kw) +#if CYTHON_METH_FASTCALL + #define __Pyx_Arg_FASTCALL(args, i) args[i] + #define __Pyx_NumKwargs_FASTCALL(kwds) PyTuple_GET_SIZE(kwds) + #define __Pyx_KwValues_FASTCALL(args, nargs) ((args) + (nargs)) + static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s); +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 + CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues); + #else + #define __Pyx_KwargsAsDict_FASTCALL(kw, kwvalues) _PyStack_AsDict(kwvalues, kw) + #endif + #define __Pyx_Arg_NewRef_FASTCALL(arg) arg /* no-op, __Pyx_Arg_FASTCALL is direct and this needs + to have the same reference counting */ + #define __Pyx_Arg_XDECREF_FASTCALL(arg) +#else + #define __Pyx_Arg_FASTCALL __Pyx_Arg_VARARGS + #define __Pyx_NumKwargs_FASTCALL __Pyx_NumKwargs_VARARGS + #define __Pyx_KwValues_FASTCALL __Pyx_KwValues_VARARGS + #define __Pyx_GetKwValue_FASTCALL __Pyx_GetKwValue_VARARGS + #define __Pyx_KwargsAsDict_FASTCALL __Pyx_KwargsAsDict_VARARGS + #define __Pyx_Arg_NewRef_FASTCALL(arg) __Pyx_Arg_NewRef_VARARGS(arg) + #define __Pyx_Arg_XDECREF_FASTCALL(arg) __Pyx_Arg_XDECREF_VARARGS(arg) +#endif +#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS +#define __Pyx_ArgsSlice_VARARGS(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_VARARGS(args, start), stop - start) +#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_FASTCALL(args, start), stop - start) +#else +#define __Pyx_ArgsSlice_VARARGS(args, start, stop) PyTuple_GetSlice(args, start, stop) +#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) PyTuple_GetSlice(args, start, stop) +#endif + +/* RaiseDoubleKeywords.proto */ +static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); + +/* ParseKeywords.proto */ +static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject *const *kwvalues, + PyObject **argnames[], + PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, + const char* function_name); + +/* RaiseArgTupleInvalid.proto */ +static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, + Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); + +/* PyFunctionFastCall.proto */ +#if CYTHON_FAST_PYCALL +#if !CYTHON_VECTORCALL +#define __Pyx_PyFunction_FastCall(func, args, nargs)\ + __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL) +static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs); +#endif +#define __Pyx_BUILD_ASSERT_EXPR(cond)\ + (sizeof(char [1 - 2*!(cond)]) - 1) +#ifndef Py_MEMBER_SIZE +#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member) +#endif +#if !CYTHON_VECTORCALL +#if PY_VERSION_HEX >= 0x03080000 + #include "frameobject.h" +#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API && !defined(PYPY_VERSION) + #ifndef Py_BUILD_CORE + #define Py_BUILD_CORE 1 + #endif + #include "internal/pycore_frame.h" +#endif + #define __Pxy_PyFrame_Initialize_Offsets() + #define __Pyx_PyFrame_GetLocalsplus(frame) ((frame)->f_localsplus) +#else + static size_t __pyx_pyframe_localsplus_offset = 0; + #include "frameobject.h" + #define __Pxy_PyFrame_Initialize_Offsets()\ + ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\ + (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus))) + #define __Pyx_PyFrame_GetLocalsplus(frame)\ + (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset)) +#endif +#endif +#endif + +/* PyObjectCall.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); +#else +#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) +#endif + +/* PyObjectCallMethO.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); +#endif + +/* PyObjectFastCall.proto */ +#define __Pyx_PyObject_FastCall(func, args, nargs) __Pyx_PyObject_FastCallDict(func, args, (size_t)(nargs), NULL) +static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs); + +/* IterFinish.proto */ +static CYTHON_INLINE int __Pyx_IterFinish(void); + +/* PyObjectCallNoArg.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); + +/* PyObjectCallOneArg.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); + +/* PyObjectGetMethod.proto */ +static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method); + +/* PyObjectCallMethod0.proto */ +static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name); + +/* RaiseNeedMoreValuesToUnpack.proto */ +static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index); + +/* RaiseTooManyValuesToUnpack.proto */ +static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected); + +/* UnpackItemEndCheck.proto */ +static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected); + +/* RaiseNoneIterError.proto */ +static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void); + +/* UnpackTupleError.proto */ +static void __Pyx_UnpackTupleError(PyObject *, Py_ssize_t index); + +/* UnpackTuple2.proto */ +#define __Pyx_unpack_tuple2(tuple, value1, value2, is_tuple, has_known_size, decref_tuple)\ + (likely(is_tuple || PyTuple_Check(tuple)) ?\ + (likely(has_known_size || PyTuple_GET_SIZE(tuple) == 2) ?\ + __Pyx_unpack_tuple2_exact(tuple, value1, value2, decref_tuple) :\ + (__Pyx_UnpackTupleError(tuple, 2), -1)) :\ + __Pyx_unpack_tuple2_generic(tuple, value1, value2, has_known_size, decref_tuple)) +static CYTHON_INLINE int __Pyx_unpack_tuple2_exact( + PyObject* tuple, PyObject** value1, PyObject** value2, int decref_tuple); +static int __Pyx_unpack_tuple2_generic( + PyObject* tuple, PyObject** value1, PyObject** value2, int has_known_size, int decref_tuple); + +/* dict_iter.proto */ +static CYTHON_INLINE PyObject* __Pyx_dict_iterator(PyObject* dict, int is_dict, PyObject* method_name, + Py_ssize_t* p_orig_length, int* p_is_dict); +static CYTHON_INLINE int __Pyx_dict_iter_next(PyObject* dict_or_iter, Py_ssize_t orig_length, Py_ssize_t* ppos, + PyObject** pkey, PyObject** pvalue, PyObject** pitem, int is_dict); + +/* UnicodeAsUCS4.proto */ +static CYTHON_INLINE Py_UCS4 __Pyx_PyUnicode_AsPy_UCS4(PyObject*); + +/* object_ord.proto */ +#if PY_MAJOR_VERSION >= 3 +#define __Pyx_PyObject_Ord(c)\ + (likely(PyUnicode_Check(c)) ? (long)__Pyx_PyUnicode_AsPy_UCS4(c) : __Pyx__PyObject_Ord(c)) +#else +#define __Pyx_PyObject_Ord(c) __Pyx__PyObject_Ord(c) +#endif +static long __Pyx__PyObject_Ord(PyObject* c); + +/* PyDictVersioning.proto */ +#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS +#define __PYX_DICT_VERSION_INIT ((PY_UINT64_T) -1) +#define __PYX_GET_DICT_VERSION(dict) (((PyDictObject*)(dict))->ma_version_tag) +#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\ + (version_var) = __PYX_GET_DICT_VERSION(dict);\ + (cache_var) = (value); +#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\ + static PY_UINT64_T __pyx_dict_version = 0;\ + static PyObject *__pyx_dict_cached_value = NULL;\ + if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\ + (VAR) = __pyx_dict_cached_value;\ + } else {\ + (VAR) = __pyx_dict_cached_value = (LOOKUP);\ + __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\ + }\ +} +static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj); +static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj); +static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version); +#else +#define __PYX_GET_DICT_VERSION(dict) (0) +#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var) +#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) (VAR) = (LOOKUP); +#endif + +/* GetModuleGlobalName.proto */ +#if CYTHON_USE_DICT_VERSIONS +#define __Pyx_GetModuleGlobalName(var, name) do {\ + static PY_UINT64_T __pyx_dict_version = 0;\ + static PyObject *__pyx_dict_cached_value = NULL;\ + (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\ + (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\ + __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ +} while(0) +#define __Pyx_GetModuleGlobalNameUncached(var, name) do {\ + PY_UINT64_T __pyx_dict_version;\ + PyObject *__pyx_dict_cached_value;\ + (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ +} while(0) +static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value); +#else +#define __Pyx_GetModuleGlobalName(var, name) (var) = __Pyx__GetModuleGlobalName(name) +#define __Pyx_GetModuleGlobalNameUncached(var, name) (var) = __Pyx__GetModuleGlobalName(name) +static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name); +#endif + +/* IncludeStructmemberH.proto */ +#include + +/* FixUpExtensionType.proto */ +#if CYTHON_USE_TYPE_SPECS +static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type); +#endif + +/* FetchSharedCythonModule.proto */ +static PyObject *__Pyx_FetchSharedCythonABIModule(void); + +/* FetchCommonType.proto */ +#if !CYTHON_USE_TYPE_SPECS +static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type); +#else +static PyTypeObject* __Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases); +#endif + +/* PyMethodNew.proto */ +#if CYTHON_COMPILING_IN_LIMITED_API +static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { + PyObject *typesModule=NULL, *methodType=NULL, *result=NULL; + CYTHON_UNUSED_VAR(typ); + if (!self) + return __Pyx_NewRef(func); + typesModule = PyImport_ImportModule("types"); + if (!typesModule) return NULL; + methodType = PyObject_GetAttrString(typesModule, "MethodType"); + Py_DECREF(typesModule); + if (!methodType) return NULL; + result = PyObject_CallFunctionObjArgs(methodType, func, self, NULL); + Py_DECREF(methodType); + return result; +} +#elif PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { + CYTHON_UNUSED_VAR(typ); + if (!self) + return __Pyx_NewRef(func); + return PyMethod_New(func, self); +} +#else + #define __Pyx_PyMethod_New PyMethod_New +#endif + +/* PyVectorcallFastCallDict.proto */ +#if CYTHON_METH_FASTCALL +static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw); +#endif + +/* CythonFunctionShared.proto */ +#define __Pyx_CyFunction_USED +#define __Pyx_CYFUNCTION_STATICMETHOD 0x01 +#define __Pyx_CYFUNCTION_CLASSMETHOD 0x02 +#define __Pyx_CYFUNCTION_CCLASS 0x04 +#define __Pyx_CYFUNCTION_COROUTINE 0x08 +#define __Pyx_CyFunction_GetClosure(f)\ + (((__pyx_CyFunctionObject *) (f))->func_closure) +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_CyFunction_GetClassObj(f)\ + (((__pyx_CyFunctionObject *) (f))->func_classobj) +#else + #define __Pyx_CyFunction_GetClassObj(f)\ + ((PyObject*) ((PyCMethodObject *) (f))->mm_class) +#endif +#define __Pyx_CyFunction_SetClassObj(f, classobj)\ + __Pyx__CyFunction_SetClassObj((__pyx_CyFunctionObject *) (f), (classobj)) +#define __Pyx_CyFunction_Defaults(type, f)\ + ((type *)(((__pyx_CyFunctionObject *) (f))->defaults)) +#define __Pyx_CyFunction_SetDefaultsGetter(f, g)\ + ((__pyx_CyFunctionObject *) (f))->defaults_getter = (g) +typedef struct { +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject_HEAD + PyObject *func; +#elif PY_VERSION_HEX < 0x030900B1 + PyCFunctionObject func; +#else + PyCMethodObject func; +#endif +#if CYTHON_BACKPORT_VECTORCALL + __pyx_vectorcallfunc func_vectorcall; +#endif +#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API + PyObject *func_weakreflist; +#endif + PyObject *func_dict; + PyObject *func_name; + PyObject *func_qualname; + PyObject *func_doc; + PyObject *func_globals; + PyObject *func_code; + PyObject *func_closure; +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + PyObject *func_classobj; +#endif + void *defaults; + int defaults_pyobjects; + size_t defaults_size; + int flags; + PyObject *defaults_tuple; + PyObject *defaults_kwdict; + PyObject *(*defaults_getter)(PyObject *); + PyObject *func_annotations; + PyObject *func_is_coroutine; +} __pyx_CyFunctionObject; +#undef __Pyx_CyOrPyCFunction_Check +#define __Pyx_CyFunction_Check(obj) __Pyx_TypeCheck(obj, __pyx_CyFunctionType) +#define __Pyx_CyOrPyCFunction_Check(obj) __Pyx_TypeCheck2(obj, __pyx_CyFunctionType, &PyCFunction_Type) +#define __Pyx_CyFunction_CheckExact(obj) __Pyx_IS_TYPE(obj, __pyx_CyFunctionType) +static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc); +#undef __Pyx_IsSameCFunction +#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCyOrCFunction(func, cfunc) +static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml, + int flags, PyObject* qualname, + PyObject *closure, + PyObject *module, PyObject *globals, + PyObject* code); +static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj); +static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m, + size_t size, + int pyobjects); +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *m, + PyObject *tuple); +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *m, + PyObject *dict); +static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m, + PyObject *dict); +static int __pyx_CyFunction_init(PyObject *module); +#if CYTHON_METH_FASTCALL +static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +#if CYTHON_BACKPORT_VECTORCALL +#define __Pyx_CyFunction_func_vectorcall(f) (((__pyx_CyFunctionObject*)f)->func_vectorcall) +#else +#define __Pyx_CyFunction_func_vectorcall(f) (((PyCFunctionObject*)f)->vectorcall) +#endif +#endif + +/* CythonFunction.proto */ +static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, + int flags, PyObject* qualname, + PyObject *closure, + PyObject *module, PyObject *globals, + PyObject* code); + +/* RaiseUnexpectedTypeError.proto */ +static int __Pyx_RaiseUnexpectedTypeError(const char *expected, PyObject *obj); + +/* UnpackUnboundCMethod.proto */ +typedef struct { + PyObject *type; + PyObject **method_name; + PyCFunction func; + PyObject *method; + int flag; +} __Pyx_CachedCFunction; + +/* CallUnboundCMethod0.proto */ +static PyObject* __Pyx__CallUnboundCMethod0(__Pyx_CachedCFunction* cfunc, PyObject* self); +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_CallUnboundCMethod0(cfunc, self)\ + (likely((cfunc)->func) ?\ + (likely((cfunc)->flag == METH_NOARGS) ? (*((cfunc)->func))(self, NULL) :\ + (PY_VERSION_HEX >= 0x030600B1 && likely((cfunc)->flag == METH_FASTCALL) ?\ + (PY_VERSION_HEX >= 0x030700A0 ?\ + (*(__Pyx_PyCFunctionFast)(void*)(PyCFunction)(cfunc)->func)(self, &__pyx_empty_tuple, 0) :\ + (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)(cfunc)->func)(self, &__pyx_empty_tuple, 0, NULL)) :\ + (PY_VERSION_HEX >= 0x030700A0 && (cfunc)->flag == (METH_FASTCALL | METH_KEYWORDS) ?\ + (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)(cfunc)->func)(self, &__pyx_empty_tuple, 0, NULL) :\ + (likely((cfunc)->flag == (METH_VARARGS | METH_KEYWORDS)) ? ((*(PyCFunctionWithKeywords)(void*)(PyCFunction)(cfunc)->func)(self, __pyx_empty_tuple, NULL)) :\ + ((cfunc)->flag == METH_VARARGS ? (*((cfunc)->func))(self, __pyx_empty_tuple) :\ + __Pyx__CallUnboundCMethod0(cfunc, self)))))) :\ + __Pyx__CallUnboundCMethod0(cfunc, self)) +#else +#define __Pyx_CallUnboundCMethod0(cfunc, self) __Pyx__CallUnboundCMethod0(cfunc, self) +#endif + +/* set_iter.proto */ +static CYTHON_INLINE PyObject* __Pyx_set_iterator(PyObject* iterable, int is_set, + Py_ssize_t* p_orig_length, int* p_source_is_set); +static CYTHON_INLINE int __Pyx_set_iter_next( + PyObject* iter_obj, Py_ssize_t orig_length, + Py_ssize_t* ppos, PyObject **value, + int source_is_set); + +/* GCCDiagnostics.proto */ +#if !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) +#define __Pyx_HAS_GCC_DIAGNOSTIC +#endif + +/* BuildPyUnicode.proto */ +static PyObject* __Pyx_PyUnicode_BuildFromAscii(Py_ssize_t ulength, char* chars, int clength, + int prepend_sign, char padding_char); + +/* CIntToPyUnicode.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_From_int(int value, Py_ssize_t width, char padding_char, char format_char); + +/* JoinPyUnicode.proto */ +static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength, + Py_UCS4 max_char); + +/* PyObjectFormatSimple.proto */ +#if CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyObject_FormatSimple(s, f) (\ + likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\ + PyObject_Format(s, f)) +#elif PY_MAJOR_VERSION < 3 + #define __Pyx_PyObject_FormatSimple(s, f) (\ + likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\ + likely(PyString_CheckExact(s)) ? PyUnicode_FromEncodedObject(s, NULL, "strict") :\ + PyObject_Format(s, f)) +#elif CYTHON_USE_TYPE_SLOTS + #define __Pyx_PyObject_FormatSimple(s, f) (\ + likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\ + likely(PyLong_CheckExact(s)) ? PyLong_Type.tp_repr(s) :\ + likely(PyFloat_CheckExact(s)) ? PyFloat_Type.tp_repr(s) :\ + PyObject_Format(s, f)) +#else + #define __Pyx_PyObject_FormatSimple(s, f) (\ + likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\ + PyObject_Format(s, f)) +#endif + +/* UnicodeConcatInPlace.proto */ +# if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_REFNANNY + #define __Pyx_PyUnicode_ConcatInPlace(left, right) __Pyx_PyUnicode_ConcatInPlaceImpl(&left, right, __pyx_refnanny) + #else + #define __Pyx_PyUnicode_ConcatInPlace(left, right) __Pyx_PyUnicode_ConcatInPlaceImpl(&left, right) + #endif + static CYTHON_INLINE PyObject *__Pyx_PyUnicode_ConcatInPlaceImpl(PyObject **p_left, PyObject *right + #if CYTHON_REFNANNY + , void* __pyx_refnanny + #endif + ); +#else +#define __Pyx_PyUnicode_ConcatInPlace __Pyx_PyUnicode_Concat +#endif +#define __Pyx_PyUnicode_ConcatInPlaceSafe(left, right) ((unlikely((left) == Py_None) || unlikely((right) == Py_None)) ?\ + PyNumber_InPlaceAdd(left, right) : __Pyx_PyUnicode_ConcatInPlace(left, right)) + +/* CallUnboundCMethod1.proto */ +static PyObject* __Pyx__CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg); +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg); +#else +#define __Pyx_CallUnboundCMethod1(cfunc, self, arg) __Pyx__CallUnboundCMethod1(cfunc, self, arg) +#endif + +/* dict_getitem_default.proto */ +static PyObject* __Pyx_PyDict_GetItemDefault(PyObject* d, PyObject* key, PyObject* default_value); + +/* CallUnboundCMethod2.proto */ +static PyObject* __Pyx__CallUnboundCMethod2(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg1, PyObject* arg2); +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030600B1 +static CYTHON_INLINE PyObject *__Pyx_CallUnboundCMethod2(__Pyx_CachedCFunction *cfunc, PyObject *self, PyObject *arg1, PyObject *arg2); +#else +#define __Pyx_CallUnboundCMethod2(cfunc, self, arg1, arg2) __Pyx__CallUnboundCMethod2(cfunc, self, arg1, arg2) +#endif + +/* GetItemInt.proto */ +#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\ + (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\ + __Pyx_GetItemInt_Generic(o, to_py_func(i)))) +#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ + (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL)) +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, + int wraparound, int boundscheck); +#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ + (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL)) +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, + int wraparound, int boundscheck); +static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j); +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, + int is_list, int wraparound, int boundscheck); + +/* PyUnicode_Unicode.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Unicode(PyObject *obj); + +/* ListCompAppend.proto */ +#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS +static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) { + PyListObject* L = (PyListObject*) list; + Py_ssize_t len = Py_SIZE(list); + if (likely(L->allocated > len)) { + Py_INCREF(x); + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 + L->ob_item[len] = x; + #else + PyList_SET_ITEM(list, len, x); + #endif + __Pyx_SET_SIZE(list, len + 1); + return 0; + } + return PyList_Append(list, x); +} +#else +#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x) +#endif + +/* ArgTypeTest.proto */ +#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\ + ((likely(__Pyx_IS_TYPE(obj, type) | (none_allowed && (obj == Py_None)))) ? 1 :\ + __Pyx__ArgTypeTest(obj, type, name, exact)) +static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact); + +/* KeywordStringCheck.proto */ +static int __Pyx_CheckKeywordStrings(PyObject *kw, const char* function_name, int kw_allowed); + +/* RaiseException.proto */ +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); + +/* ValidateBasesTuple.proto */ +#if CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API || CYTHON_USE_TYPE_SPECS +static int __Pyx_validate_bases_tuple(const char *type_name, Py_ssize_t dictoffset, PyObject *bases); +#endif + +/* PyType_Ready.proto */ +CYTHON_UNUSED static int __Pyx_PyType_Ready(PyTypeObject *t); + +/* PyObject_GenericGetAttrNoDict.proto */ +#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 +static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name); +#else +#define __Pyx_PyObject_GenericGetAttrNoDict PyObject_GenericGetAttr +#endif + +/* PyObject_GenericGetAttr.proto */ +#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 +static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name); +#else +#define __Pyx_PyObject_GenericGetAttr PyObject_GenericGetAttr +#endif + +/* SetVTable.proto */ +static int __Pyx_SetVtable(PyTypeObject* typeptr , void* vtable); + +/* GetVTable.proto */ +static void* __Pyx_GetVtable(PyTypeObject *type); + +/* MergeVTables.proto */ +#if !CYTHON_COMPILING_IN_LIMITED_API +static int __Pyx_MergeVtables(PyTypeObject *type); +#endif + +/* SetupReduce.proto */ +#if !CYTHON_COMPILING_IN_LIMITED_API +static int __Pyx_setup_reduce(PyObject* type_obj); +#endif + +/* Import.proto */ +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); + +/* ImportDottedModule.proto */ +static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple); +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple); +#endif + +/* ImportFrom.proto */ +static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name); + +/* CLineInTraceback.proto */ +#ifdef CYTHON_CLINE_IN_TRACEBACK +#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0) +#else +static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line); +#endif + +/* CodeObjectCache.proto */ +#if !CYTHON_COMPILING_IN_LIMITED_API +typedef struct { + PyCodeObject* code_object; + int code_line; +} __Pyx_CodeObjectCacheEntry; +struct __Pyx_CodeObjectCache { + int count; + int max_count; + __Pyx_CodeObjectCacheEntry* entries; +}; +static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); +static PyCodeObject *__pyx_find_code_object(int code_line); +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); +#endif + +/* AddTraceback.proto */ +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); + +/* CIntFromPy.proto */ +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); + +/* CIntFromPy.proto */ +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value); + +/* FormatTypeName.proto */ +#if CYTHON_COMPILING_IN_LIMITED_API +typedef PyObject *__Pyx_TypeName; +#define __Pyx_FMT_TYPENAME "%U" +static __Pyx_TypeName __Pyx_PyType_GetName(PyTypeObject* tp); +#define __Pyx_DECREF_TypeName(obj) Py_XDECREF(obj) +#else +typedef const char *__Pyx_TypeName; +#define __Pyx_FMT_TYPENAME "%.200s" +#define __Pyx_PyType_GetName(tp) ((tp)->tp_name) +#define __Pyx_DECREF_TypeName(obj) +#endif + +/* FastTypeChecks.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type) +#define __Pyx_TypeCheck2(obj, type1, type2) __Pyx_IsAnySubtype2(Py_TYPE(obj), (PyTypeObject *)type1, (PyTypeObject *)type2) +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b); +static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2); +#else +#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) +#define __Pyx_TypeCheck2(obj, type1, type2) (PyObject_TypeCheck(obj, (PyTypeObject *)type1) || PyObject_TypeCheck(obj, (PyTypeObject *)type2)) +#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type) +#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2)) +#endif +#define __Pyx_PyErr_ExceptionMatches2(err1, err2) __Pyx_PyErr_GivenExceptionMatches2(__Pyx_PyErr_CurrentExceptionType(), err1, err2) +#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception) + +/* CheckBinaryVersion.proto */ +static unsigned long __Pyx_get_runtime_version(void); +static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer); + +/* InitStrings.proto */ +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); + +/* #### Code section: module_declarations ### */ +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__apply_punc_replacements(CYTHON_UNUSED struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text, PyObject *__pyx_v_replacements); /* proto*/ +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__punc_norm(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text); /* proto*/ +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__wrap_with_placeholders(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text); /* proto*/ +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__normalize(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text); /* proto*/ +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__do_indic_tokenize_and_transliterate(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sentence, PyObject *__pyx_v_normalizer, PyObject *__pyx_v_iso_lang, int __pyx_v_transliterate); /* proto*/ +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__preprocess(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sent, PyObject *__pyx_v_src_lang, PyObject *__pyx_v_tgt_lang, PyObject *__pyx_v_normalizer, int __pyx_v_is_target); /* proto*/ +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__postprocess(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sent, PyObject *__pyx_v_lang); /* proto*/ +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_batch, PyObject *__pyx_v_src_lang, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch *__pyx_optional_args); /* proto*/ +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sents, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch *__pyx_optional_args); /* proto*/ + +/* Module declarations from "IndicTransToolkit.processor" */ +/* #### Code section: typeinfo ### */ +/* #### Code section: before_global_var ### */ +#define __Pyx_MODULE_NAME "IndicTransToolkit.processor" +extern int __pyx_module_is_main_IndicTransToolkit__processor; +int __pyx_module_is_main_IndicTransToolkit__processor = 0; + +/* Implementation of "IndicTransToolkit.processor" */ +/* #### Code section: global_var ### */ +static PyObject *__pyx_builtin_range; +static PyObject *__pyx_builtin_chr; +static PyObject *__pyx_builtin_TypeError; +/* #### Code section: string_decls ### */ +static const char __pyx_k_[] = "\340\247\246"; +static const char __pyx_k_0[] = "0"; +static const char __pyx_k_1[] = "1"; +static const char __pyx_k_2[] = "2"; +static const char __pyx_k_3[] = "3"; +static const char __pyx_k_4[] = "4"; +static const char __pyx_k_5[] = "5"; +static const char __pyx_k_6[] = "6"; +static const char __pyx_k_7[] = "7"; +static const char __pyx_k_8[] = "8"; +static const char __pyx_k_9[] = "9"; +static const char __pyx_k_C[] = "\302\240\302\272C"; +static const char __pyx_k_d[] = "(\\d) %"; +static const char __pyx_k_m[] = "m"; +static const char __pyx_k_n[] = "n\302\272\302\240"; +static const char __pyx_k_r[] = "\\r"; +static const char __pyx_k_s[] = "\\(\\s*"; +static const char __pyx_k_ID[] = "__pyx_d); + Py_CLEAR(clear_module_state->__pyx_b); + Py_CLEAR(clear_module_state->__pyx_cython_runtime); + Py_CLEAR(clear_module_state->__pyx_empty_tuple); + Py_CLEAR(clear_module_state->__pyx_empty_bytes); + Py_CLEAR(clear_module_state->__pyx_empty_unicode); + #ifdef __Pyx_CyFunction_USED + Py_CLEAR(clear_module_state->__pyx_CyFunctionType); + #endif + #ifdef __Pyx_FusedFunction_USED + Py_CLEAR(clear_module_state->__pyx_FusedFunctionType); + #endif + Py_CLEAR(clear_module_state->__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor); + Py_CLEAR(clear_module_state->__pyx_type_17IndicTransToolkit_9processor_IndicProcessor); + Py_CLEAR(clear_module_state->__pyx_n_u_); + Py_CLEAR(clear_module_state->__pyx_kp_u_0); + Py_CLEAR(clear_module_state->__pyx_kp_u_1); + Py_CLEAR(clear_module_state->__pyx_kp_u_1_2); + Py_CLEAR(clear_module_state->__pyx_kp_u_1_2_2); + Py_CLEAR(clear_module_state->__pyx_kp_u_1_3); + Py_CLEAR(clear_module_state->__pyx_kp_u_1_4); + Py_CLEAR(clear_module_state->__pyx_kp_u_2); + Py_CLEAR(clear_module_state->__pyx_kp_u_2_2); + Py_CLEAR(clear_module_state->__pyx_kp_u_3); + Py_CLEAR(clear_module_state->__pyx_kp_u_4); + Py_CLEAR(clear_module_state->__pyx_kp_u_5); + Py_CLEAR(clear_module_state->__pyx_kp_u_6); + Py_CLEAR(clear_module_state->__pyx_kp_u_7); + Py_CLEAR(clear_module_state->__pyx_kp_u_8); + Py_CLEAR(clear_module_state->__pyx_kp_u_9); + Py_CLEAR(clear_module_state->__pyx_kp_u_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2); + Py_CLEAR(clear_module_state->__pyx_kp_u_A_Za_z0_9_w); + Py_CLEAR(clear_module_state->__pyx_n_u_Arab); + Py_CLEAR(clear_module_state->__pyx_n_u_Aran); + Py_CLEAR(clear_module_state->__pyx_kp_u_C); + Py_CLEAR(clear_module_state->__pyx_kp_u_C_2); + Py_CLEAR(clear_module_state->__pyx_n_s_Dict); + Py_CLEAR(clear_module_state->__pyx_kp_u_ID); + Py_CLEAR(clear_module_state->__pyx_kp_u_ID_2); + Py_CLEAR(clear_module_state->__pyx_kp_u_ID_3); + Py_CLEAR(clear_module_state->__pyx_kp_u_ID_4); + Py_CLEAR(clear_module_state->__pyx_kp_u_ID_5); + Py_CLEAR(clear_module_state->__pyx_n_s_IndicNormalizerFactory); + Py_CLEAR(clear_module_state->__pyx_n_s_IndicProcessor); + Py_CLEAR(clear_module_state->__pyx_n_s_IndicProcessor___reduce_cython); + Py_CLEAR(clear_module_state->__pyx_n_s_IndicProcessor___setstate_cython); + Py_CLEAR(clear_module_state->__pyx_n_s_IndicProcessor_postprocess_batch); + Py_CLEAR(clear_module_state->__pyx_n_s_IndicProcessor_preprocess_batch); + Py_CLEAR(clear_module_state->__pyx_n_s_IndicTransToolkit_processor); + Py_CLEAR(clear_module_state->__pyx_kp_s_IndicTransToolkit_processor_pyx); + Py_CLEAR(clear_module_state->__pyx_n_u_Latn); + Py_CLEAR(clear_module_state->__pyx_n_s_List); + Py_CLEAR(clear_module_state->__pyx_n_s_MosesDetokenizer); + Py_CLEAR(clear_module_state->__pyx_n_s_MosesPunctNormalizer); + Py_CLEAR(clear_module_state->__pyx_n_s_MosesTokenizer); + Py_CLEAR(clear_module_state->__pyx_n_u_Mtei); + Py_CLEAR(clear_module_state->__pyx_kp_u_None); + Py_CLEAR(clear_module_state->__pyx_n_u_Olck); + Py_CLEAR(clear_module_state->__pyx_kp_u_Post_processing); + Py_CLEAR(clear_module_state->__pyx_kp_u_Pre_processing); + Py_CLEAR(clear_module_state->__pyx_n_s_Queue); + Py_CLEAR(clear_module_state->__pyx_n_s_TypeError); + Py_CLEAR(clear_module_state->__pyx_n_s_UnicodeIndicTransliterator); + Py_CLEAR(clear_module_state->__pyx_n_s_Union); + Py_CLEAR(clear_module_state->__pyx_n_u__10); + Py_CLEAR(clear_module_state->__pyx_n_u__100); + Py_CLEAR(clear_module_state->__pyx_kp_u__101); + Py_CLEAR(clear_module_state->__pyx_kp_u__102); + Py_CLEAR(clear_module_state->__pyx_kp_u__103); + Py_CLEAR(clear_module_state->__pyx_kp_u__104); + Py_CLEAR(clear_module_state->__pyx_kp_u__105); + Py_CLEAR(clear_module_state->__pyx_kp_u__106); + Py_CLEAR(clear_module_state->__pyx_kp_u__107); + Py_CLEAR(clear_module_state->__pyx_kp_u__108); + Py_CLEAR(clear_module_state->__pyx_kp_u__109); + Py_CLEAR(clear_module_state->__pyx_n_u__11); + Py_CLEAR(clear_module_state->__pyx_kp_u__110); + Py_CLEAR(clear_module_state->__pyx_kp_u__111); + Py_CLEAR(clear_module_state->__pyx_kp_u__112); + Py_CLEAR(clear_module_state->__pyx_kp_u__113); + Py_CLEAR(clear_module_state->__pyx_kp_u__114); + Py_CLEAR(clear_module_state->__pyx_kp_u__115); + Py_CLEAR(clear_module_state->__pyx_kp_u__116); + Py_CLEAR(clear_module_state->__pyx_kp_u__117); + Py_CLEAR(clear_module_state->__pyx_kp_u__118); + Py_CLEAR(clear_module_state->__pyx_kp_u__119); + Py_CLEAR(clear_module_state->__pyx_n_u__12); + Py_CLEAR(clear_module_state->__pyx_kp_u__120); + Py_CLEAR(clear_module_state->__pyx_kp_u__121); + Py_CLEAR(clear_module_state->__pyx_kp_u__122); + Py_CLEAR(clear_module_state->__pyx_kp_u__123); + Py_CLEAR(clear_module_state->__pyx_kp_u__124); + Py_CLEAR(clear_module_state->__pyx_kp_u__125); + Py_CLEAR(clear_module_state->__pyx_kp_u__126); + Py_CLEAR(clear_module_state->__pyx_kp_u__127); + Py_CLEAR(clear_module_state->__pyx_kp_u__128); + Py_CLEAR(clear_module_state->__pyx_kp_u__129); + Py_CLEAR(clear_module_state->__pyx_n_u__13); + Py_CLEAR(clear_module_state->__pyx_kp_u__130); + Py_CLEAR(clear_module_state->__pyx_kp_u__131); + Py_CLEAR(clear_module_state->__pyx_kp_u__132); + Py_CLEAR(clear_module_state->__pyx_kp_u__133); + Py_CLEAR(clear_module_state->__pyx_kp_u__134); + Py_CLEAR(clear_module_state->__pyx_kp_u__135); + Py_CLEAR(clear_module_state->__pyx_kp_u__136); + Py_CLEAR(clear_module_state->__pyx_kp_u__137); + Py_CLEAR(clear_module_state->__pyx_n_u__138); + Py_CLEAR(clear_module_state->__pyx_kp_u__139); + Py_CLEAR(clear_module_state->__pyx_n_u__14); + Py_CLEAR(clear_module_state->__pyx_kp_u__140); + Py_CLEAR(clear_module_state->__pyx_kp_u__141); + Py_CLEAR(clear_module_state->__pyx_kp_u__142); + Py_CLEAR(clear_module_state->__pyx_kp_u__143); + Py_CLEAR(clear_module_state->__pyx_kp_u__144); + Py_CLEAR(clear_module_state->__pyx_kp_u__145); + Py_CLEAR(clear_module_state->__pyx_kp_u__146); + Py_CLEAR(clear_module_state->__pyx_kp_u__147); + Py_CLEAR(clear_module_state->__pyx_kp_u__148); + Py_CLEAR(clear_module_state->__pyx_kp_u__149); + Py_CLEAR(clear_module_state->__pyx_n_u__15); + Py_CLEAR(clear_module_state->__pyx_kp_u__151); + Py_CLEAR(clear_module_state->__pyx_kp_u__153); + Py_CLEAR(clear_module_state->__pyx_kp_u__154); + Py_CLEAR(clear_module_state->__pyx_n_u__155); + Py_CLEAR(clear_module_state->__pyx_kp_u__156); + Py_CLEAR(clear_module_state->__pyx_kp_u__157); + Py_CLEAR(clear_module_state->__pyx_kp_u__159); + Py_CLEAR(clear_module_state->__pyx_n_u__16); + Py_CLEAR(clear_module_state->__pyx_kp_u__160); + Py_CLEAR(clear_module_state->__pyx_kp_u__162); + Py_CLEAR(clear_module_state->__pyx_kp_u__163); + Py_CLEAR(clear_module_state->__pyx_kp_u__165); + Py_CLEAR(clear_module_state->__pyx_n_u__166); + Py_CLEAR(clear_module_state->__pyx_kp_u__168); + Py_CLEAR(clear_module_state->__pyx_n_u__169); + Py_CLEAR(clear_module_state->__pyx_n_u__17); + Py_CLEAR(clear_module_state->__pyx_n_s__171); + Py_CLEAR(clear_module_state->__pyx_n_u__18); + Py_CLEAR(clear_module_state->__pyx_n_s__182); + Py_CLEAR(clear_module_state->__pyx_n_u__19); + Py_CLEAR(clear_module_state->__pyx_n_u__2); + Py_CLEAR(clear_module_state->__pyx_n_u__20); + Py_CLEAR(clear_module_state->__pyx_n_u__21); + Py_CLEAR(clear_module_state->__pyx_n_u__22); + Py_CLEAR(clear_module_state->__pyx_n_u__23); + Py_CLEAR(clear_module_state->__pyx_n_u__24); + Py_CLEAR(clear_module_state->__pyx_n_u__25); + Py_CLEAR(clear_module_state->__pyx_n_u__26); + Py_CLEAR(clear_module_state->__pyx_n_u__27); + Py_CLEAR(clear_module_state->__pyx_n_u__28); + Py_CLEAR(clear_module_state->__pyx_n_u__29); + Py_CLEAR(clear_module_state->__pyx_n_u__3); + Py_CLEAR(clear_module_state->__pyx_n_u__30); + Py_CLEAR(clear_module_state->__pyx_n_u__31); + Py_CLEAR(clear_module_state->__pyx_n_u__32); + Py_CLEAR(clear_module_state->__pyx_n_u__33); + Py_CLEAR(clear_module_state->__pyx_n_u__34); + Py_CLEAR(clear_module_state->__pyx_n_u__35); + Py_CLEAR(clear_module_state->__pyx_n_u__36); + Py_CLEAR(clear_module_state->__pyx_n_u__37); + Py_CLEAR(clear_module_state->__pyx_n_u__38); + Py_CLEAR(clear_module_state->__pyx_n_u__39); + Py_CLEAR(clear_module_state->__pyx_n_u__4); + Py_CLEAR(clear_module_state->__pyx_n_u__40); + Py_CLEAR(clear_module_state->__pyx_n_u__41); + Py_CLEAR(clear_module_state->__pyx_n_u__42); + Py_CLEAR(clear_module_state->__pyx_n_u__43); + Py_CLEAR(clear_module_state->__pyx_n_u__44); + Py_CLEAR(clear_module_state->__pyx_n_u__45); + Py_CLEAR(clear_module_state->__pyx_n_u__46); + Py_CLEAR(clear_module_state->__pyx_n_u__47); + Py_CLEAR(clear_module_state->__pyx_n_u__48); + Py_CLEAR(clear_module_state->__pyx_n_u__49); + Py_CLEAR(clear_module_state->__pyx_n_u__5); + Py_CLEAR(clear_module_state->__pyx_n_u__50); + Py_CLEAR(clear_module_state->__pyx_n_u__51); + Py_CLEAR(clear_module_state->__pyx_n_u__52); + Py_CLEAR(clear_module_state->__pyx_n_u__53); + Py_CLEAR(clear_module_state->__pyx_n_u__54); + Py_CLEAR(clear_module_state->__pyx_n_u__55); + Py_CLEAR(clear_module_state->__pyx_n_u__56); + Py_CLEAR(clear_module_state->__pyx_n_u__57); + Py_CLEAR(clear_module_state->__pyx_n_u__58); + Py_CLEAR(clear_module_state->__pyx_n_u__59); + Py_CLEAR(clear_module_state->__pyx_n_u__6); + Py_CLEAR(clear_module_state->__pyx_n_u__60); + Py_CLEAR(clear_module_state->__pyx_n_u__61); + Py_CLEAR(clear_module_state->__pyx_n_u__62); + Py_CLEAR(clear_module_state->__pyx_n_u__63); + Py_CLEAR(clear_module_state->__pyx_n_u__64); + Py_CLEAR(clear_module_state->__pyx_n_u__65); + Py_CLEAR(clear_module_state->__pyx_n_u__66); + Py_CLEAR(clear_module_state->__pyx_n_u__67); + Py_CLEAR(clear_module_state->__pyx_n_u__68); + Py_CLEAR(clear_module_state->__pyx_n_u__69); + Py_CLEAR(clear_module_state->__pyx_n_u__7); + Py_CLEAR(clear_module_state->__pyx_n_u__70); + Py_CLEAR(clear_module_state->__pyx_n_u__71); + Py_CLEAR(clear_module_state->__pyx_n_u__72); + Py_CLEAR(clear_module_state->__pyx_n_u__73); + Py_CLEAR(clear_module_state->__pyx_n_u__74); + Py_CLEAR(clear_module_state->__pyx_n_u__75); + Py_CLEAR(clear_module_state->__pyx_n_u__76); + Py_CLEAR(clear_module_state->__pyx_n_u__77); + Py_CLEAR(clear_module_state->__pyx_n_u__78); + Py_CLEAR(clear_module_state->__pyx_n_u__79); + Py_CLEAR(clear_module_state->__pyx_n_u__8); + Py_CLEAR(clear_module_state->__pyx_n_u__80); + Py_CLEAR(clear_module_state->__pyx_n_u__81); + Py_CLEAR(clear_module_state->__pyx_n_u__82); + Py_CLEAR(clear_module_state->__pyx_n_u__83); + Py_CLEAR(clear_module_state->__pyx_n_u__84); + Py_CLEAR(clear_module_state->__pyx_n_u__85); + Py_CLEAR(clear_module_state->__pyx_n_u__86); + Py_CLEAR(clear_module_state->__pyx_n_u__87); + Py_CLEAR(clear_module_state->__pyx_n_u__88); + Py_CLEAR(clear_module_state->__pyx_n_u__89); + Py_CLEAR(clear_module_state->__pyx_n_u__9); + Py_CLEAR(clear_module_state->__pyx_n_u__90); + Py_CLEAR(clear_module_state->__pyx_n_u__91); + Py_CLEAR(clear_module_state->__pyx_n_u__92); + Py_CLEAR(clear_module_state->__pyx_n_u__93); + Py_CLEAR(clear_module_state->__pyx_n_u__94); + Py_CLEAR(clear_module_state->__pyx_n_u__95); + Py_CLEAR(clear_module_state->__pyx_n_u__96); + Py_CLEAR(clear_module_state->__pyx_n_u__97); + Py_CLEAR(clear_module_state->__pyx_n_u__98); + Py_CLEAR(clear_module_state->__pyx_n_u__99); + Py_CLEAR(clear_module_state->__pyx_n_u_as); + Py_CLEAR(clear_module_state->__pyx_n_u_asm_Beng); + Py_CLEAR(clear_module_state->__pyx_n_s_asyncio_coroutines); + Py_CLEAR(clear_module_state->__pyx_n_u_awa_Deva); + Py_CLEAR(clear_module_state->__pyx_kp_u_b_w_https_ftp_w_w_w_b); + Py_CLEAR(clear_module_state->__pyx_n_s_batch); + Py_CLEAR(clear_module_state->__pyx_n_u_ben_Beng); + Py_CLEAR(clear_module_state->__pyx_n_u_bho_Deva); + Py_CLEAR(clear_module_state->__pyx_n_u_bn); + Py_CLEAR(clear_module_state->__pyx_n_u_brx_Deva); + Py_CLEAR(clear_module_state->__pyx_n_s_chr); + Py_CLEAR(clear_module_state->__pyx_n_s_cinit___locals_lambda); + Py_CLEAR(clear_module_state->__pyx_n_s_clear); + Py_CLEAR(clear_module_state->__pyx_n_s_cline_in_traceback); + Py_CLEAR(clear_module_state->__pyx_n_s_compile); + Py_CLEAR(clear_module_state->__pyx_kp_u_d); + Py_CLEAR(clear_module_state->__pyx_kp_u_d_d); + Py_CLEAR(clear_module_state->__pyx_kp_u_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d); + Py_CLEAR(clear_module_state->__pyx_n_s_desc); + Py_CLEAR(clear_module_state->__pyx_n_s_detokenize); + Py_CLEAR(clear_module_state->__pyx_kp_u_disable); + Py_CLEAR(clear_module_state->__pyx_n_u_doi_Deva); + Py_CLEAR(clear_module_state->__pyx_n_u_en); + Py_CLEAR(clear_module_state->__pyx_kp_u_enable); + Py_CLEAR(clear_module_state->__pyx_n_u_eng_Latn); + Py_CLEAR(clear_module_state->__pyx_n_s_escape); + Py_CLEAR(clear_module_state->__pyx_n_s_findall); + Py_CLEAR(clear_module_state->__pyx_kp_u_gc); + Py_CLEAR(clear_module_state->__pyx_n_s_get); + Py_CLEAR(clear_module_state->__pyx_n_s_get_normalizer); + Py_CLEAR(clear_module_state->__pyx_n_s_getstate); + Py_CLEAR(clear_module_state->__pyx_n_u_gom_Deva); + Py_CLEAR(clear_module_state->__pyx_n_u_gon_Deva); + Py_CLEAR(clear_module_state->__pyx_n_s_group); + Py_CLEAR(clear_module_state->__pyx_n_u_gu); + Py_CLEAR(clear_module_state->__pyx_n_u_guj_Gujr); + Py_CLEAR(clear_module_state->__pyx_n_u_hi); + Py_CLEAR(clear_module_state->__pyx_n_u_hin_Deva); + Py_CLEAR(clear_module_state->__pyx_n_u_hne_Deva); + Py_CLEAR(clear_module_state->__pyx_n_s_import); + Py_CLEAR(clear_module_state->__pyx_n_s_indic_detokenize); + Py_CLEAR(clear_module_state->__pyx_n_s_indic_tokenize); + Py_CLEAR(clear_module_state->__pyx_n_s_indicnlp_normalize_indic_normali); + Py_CLEAR(clear_module_state->__pyx_n_s_indicnlp_tokenize); + Py_CLEAR(clear_module_state->__pyx_n_s_indicnlp_transliterate_unicode_t); + Py_CLEAR(clear_module_state->__pyx_n_s_inference); + Py_CLEAR(clear_module_state->__pyx_n_s_initializing); + Py_CLEAR(clear_module_state->__pyx_n_s_is_coroutine); + Py_CLEAR(clear_module_state->__pyx_n_s_is_target); + Py_CLEAR(clear_module_state->__pyx_kp_u_isenabled); + Py_CLEAR(clear_module_state->__pyx_n_s_items); + Py_CLEAR(clear_module_state->__pyx_n_u_kK); + Py_CLEAR(clear_module_state->__pyx_n_u_kan_Knda); + Py_CLEAR(clear_module_state->__pyx_n_u_kas_Arab); + Py_CLEAR(clear_module_state->__pyx_n_u_kas_Deva); + Py_CLEAR(clear_module_state->__pyx_n_u_kha_Latn); + Py_CLEAR(clear_module_state->__pyx_n_u_kn); + Py_CLEAR(clear_module_state->__pyx_n_s_lang); + Py_CLEAR(clear_module_state->__pyx_n_u_line); + Py_CLEAR(clear_module_state->__pyx_n_u_lus_Latn); + Py_CLEAR(clear_module_state->__pyx_n_s_m); + Py_CLEAR(clear_module_state->__pyx_n_u_mag_Deva); + Py_CLEAR(clear_module_state->__pyx_n_u_mai_Deva); + Py_CLEAR(clear_module_state->__pyx_n_s_main); + Py_CLEAR(clear_module_state->__pyx_n_u_mal_Mlym); + Py_CLEAR(clear_module_state->__pyx_n_u_mar_Deva); + Py_CLEAR(clear_module_state->__pyx_n_u_ml); + Py_CLEAR(clear_module_state->__pyx_n_u_mni_Beng); + Py_CLEAR(clear_module_state->__pyx_n_u_mni_Mtei); + Py_CLEAR(clear_module_state->__pyx_n_u_mr); + Py_CLEAR(clear_module_state->__pyx_kp_u_n); + Py_CLEAR(clear_module_state->__pyx_kp_u_n_2); + Py_CLEAR(clear_module_state->__pyx_n_s_name); + Py_CLEAR(clear_module_state->__pyx_n_u_ne); + Py_CLEAR(clear_module_state->__pyx_kp_s_no_default___reduce___due_to_non); + Py_CLEAR(clear_module_state->__pyx_n_s_normalize); + Py_CLEAR(clear_module_state->__pyx_n_u_npi_Deva); + Py_CLEAR(clear_module_state->__pyx_n_u_or); + Py_CLEAR(clear_module_state->__pyx_n_u_ory); + Py_CLEAR(clear_module_state->__pyx_n_u_ory_Orya); + Py_CLEAR(clear_module_state->__pyx_n_u_pa); + Py_CLEAR(clear_module_state->__pyx_n_u_pan_Guru); + Py_CLEAR(clear_module_state->__pyx_n_s_postprocess_batch); + Py_CLEAR(clear_module_state->__pyx_n_s_preprocess_batch); + Py_CLEAR(clear_module_state->__pyx_n_s_put); + Py_CLEAR(clear_module_state->__pyx_n_s_pyx_state); + Py_CLEAR(clear_module_state->__pyx_n_s_pyx_vtable); + Py_CLEAR(clear_module_state->__pyx_n_s_queue); + Py_CLEAR(clear_module_state->__pyx_kp_u_r); + Py_CLEAR(clear_module_state->__pyx_n_s_range); + Py_CLEAR(clear_module_state->__pyx_n_s_re); + Py_CLEAR(clear_module_state->__pyx_n_s_reduce); + Py_CLEAR(clear_module_state->__pyx_n_s_reduce_cython); + Py_CLEAR(clear_module_state->__pyx_n_s_reduce_ex); + Py_CLEAR(clear_module_state->__pyx_n_s_regex); + Py_CLEAR(clear_module_state->__pyx_n_s_replace); + Py_CLEAR(clear_module_state->__pyx_kp_u_s); + Py_CLEAR(clear_module_state->__pyx_kp_u_s_2); + Py_CLEAR(clear_module_state->__pyx_kp_u_s_3); + Py_CLEAR(clear_module_state->__pyx_kp_u_s_s); + Py_CLEAR(clear_module_state->__pyx_kp_u_s_s_2); + Py_CLEAR(clear_module_state->__pyx_n_s_sacremoses); + Py_CLEAR(clear_module_state->__pyx_n_u_san_Deva); + Py_CLEAR(clear_module_state->__pyx_n_u_sat_Olck); + Py_CLEAR(clear_module_state->__pyx_n_s_self); + Py_CLEAR(clear_module_state->__pyx_n_s_sents); + Py_CLEAR(clear_module_state->__pyx_n_s_setstate); + Py_CLEAR(clear_module_state->__pyx_n_s_setstate_cython); + Py_CLEAR(clear_module_state->__pyx_n_u_snd_Arab); + Py_CLEAR(clear_module_state->__pyx_n_u_snd_Deva); + Py_CLEAR(clear_module_state->__pyx_n_s_spec); + Py_CLEAR(clear_module_state->__pyx_n_s_split); + Py_CLEAR(clear_module_state->__pyx_n_s_src_lang); + Py_CLEAR(clear_module_state->__pyx_kp_s_stringsource); + Py_CLEAR(clear_module_state->__pyx_n_s_strip); + Py_CLEAR(clear_module_state->__pyx_n_s_sub); + Py_CLEAR(clear_module_state->__pyx_n_u_ta); + Py_CLEAR(clear_module_state->__pyx_n_u_tam_Taml); + Py_CLEAR(clear_module_state->__pyx_n_u_te); + Py_CLEAR(clear_module_state->__pyx_n_u_tel_Telu); + Py_CLEAR(clear_module_state->__pyx_n_s_test); + Py_CLEAR(clear_module_state->__pyx_n_s_tgt_lang); + Py_CLEAR(clear_module_state->__pyx_n_s_tokenize); + Py_CLEAR(clear_module_state->__pyx_n_s_total); + Py_CLEAR(clear_module_state->__pyx_n_s_tqdm); + Py_CLEAR(clear_module_state->__pyx_n_s_translate); + Py_CLEAR(clear_module_state->__pyx_n_s_transliterate); + Py_CLEAR(clear_module_state->__pyx_n_s_trivial_detokenize); + Py_CLEAR(clear_module_state->__pyx_n_s_trivial_tokenize); + Py_CLEAR(clear_module_state->__pyx_n_s_typing); + Py_CLEAR(clear_module_state->__pyx_n_s_unit); + Py_CLEAR(clear_module_state->__pyx_n_u_unr_Deva); + Py_CLEAR(clear_module_state->__pyx_n_u_ur); + Py_CLEAR(clear_module_state->__pyx_n_u_urd_Arab); + Py_CLEAR(clear_module_state->__pyx_n_s_visualize); + Py_CLEAR(clear_module_state->__pyx_int_0); + Py_CLEAR(clear_module_state->__pyx_tuple__150); + Py_CLEAR(clear_module_state->__pyx_tuple__152); + Py_CLEAR(clear_module_state->__pyx_tuple__158); + Py_CLEAR(clear_module_state->__pyx_tuple__161); + Py_CLEAR(clear_module_state->__pyx_tuple__164); + Py_CLEAR(clear_module_state->__pyx_tuple__167); + Py_CLEAR(clear_module_state->__pyx_tuple__170); + Py_CLEAR(clear_module_state->__pyx_tuple__172); + Py_CLEAR(clear_module_state->__pyx_tuple__174); + Py_CLEAR(clear_module_state->__pyx_tuple__175); + Py_CLEAR(clear_module_state->__pyx_tuple__177); + Py_CLEAR(clear_module_state->__pyx_tuple__178); + Py_CLEAR(clear_module_state->__pyx_tuple__180); + Py_CLEAR(clear_module_state->__pyx_codeobj__173); + Py_CLEAR(clear_module_state->__pyx_codeobj__176); + Py_CLEAR(clear_module_state->__pyx_codeobj__179); + Py_CLEAR(clear_module_state->__pyx_codeobj__181); + return 0; +} +#endif +/* #### Code section: module_state_traverse ### */ +#if CYTHON_USE_MODULE_STATE +static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) { + __pyx_mstate *traverse_module_state = __pyx_mstate(m); + if (!traverse_module_state) return 0; + Py_VISIT(traverse_module_state->__pyx_d); + Py_VISIT(traverse_module_state->__pyx_b); + Py_VISIT(traverse_module_state->__pyx_cython_runtime); + Py_VISIT(traverse_module_state->__pyx_empty_tuple); + Py_VISIT(traverse_module_state->__pyx_empty_bytes); + Py_VISIT(traverse_module_state->__pyx_empty_unicode); + #ifdef __Pyx_CyFunction_USED + Py_VISIT(traverse_module_state->__pyx_CyFunctionType); + #endif + #ifdef __Pyx_FusedFunction_USED + Py_VISIT(traverse_module_state->__pyx_FusedFunctionType); + #endif + Py_VISIT(traverse_module_state->__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor); + Py_VISIT(traverse_module_state->__pyx_type_17IndicTransToolkit_9processor_IndicProcessor); + Py_VISIT(traverse_module_state->__pyx_n_u_); + Py_VISIT(traverse_module_state->__pyx_kp_u_0); + Py_VISIT(traverse_module_state->__pyx_kp_u_1); + Py_VISIT(traverse_module_state->__pyx_kp_u_1_2); + Py_VISIT(traverse_module_state->__pyx_kp_u_1_2_2); + Py_VISIT(traverse_module_state->__pyx_kp_u_1_3); + Py_VISIT(traverse_module_state->__pyx_kp_u_1_4); + Py_VISIT(traverse_module_state->__pyx_kp_u_2); + Py_VISIT(traverse_module_state->__pyx_kp_u_2_2); + Py_VISIT(traverse_module_state->__pyx_kp_u_3); + Py_VISIT(traverse_module_state->__pyx_kp_u_4); + Py_VISIT(traverse_module_state->__pyx_kp_u_5); + Py_VISIT(traverse_module_state->__pyx_kp_u_6); + Py_VISIT(traverse_module_state->__pyx_kp_u_7); + Py_VISIT(traverse_module_state->__pyx_kp_u_8); + Py_VISIT(traverse_module_state->__pyx_kp_u_9); + Py_VISIT(traverse_module_state->__pyx_kp_u_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2); + Py_VISIT(traverse_module_state->__pyx_kp_u_A_Za_z0_9_w); + Py_VISIT(traverse_module_state->__pyx_n_u_Arab); + Py_VISIT(traverse_module_state->__pyx_n_u_Aran); + Py_VISIT(traverse_module_state->__pyx_kp_u_C); + Py_VISIT(traverse_module_state->__pyx_kp_u_C_2); + Py_VISIT(traverse_module_state->__pyx_n_s_Dict); + Py_VISIT(traverse_module_state->__pyx_kp_u_ID); + Py_VISIT(traverse_module_state->__pyx_kp_u_ID_2); + Py_VISIT(traverse_module_state->__pyx_kp_u_ID_3); + Py_VISIT(traverse_module_state->__pyx_kp_u_ID_4); + Py_VISIT(traverse_module_state->__pyx_kp_u_ID_5); + Py_VISIT(traverse_module_state->__pyx_n_s_IndicNormalizerFactory); + Py_VISIT(traverse_module_state->__pyx_n_s_IndicProcessor); + Py_VISIT(traverse_module_state->__pyx_n_s_IndicProcessor___reduce_cython); + Py_VISIT(traverse_module_state->__pyx_n_s_IndicProcessor___setstate_cython); + Py_VISIT(traverse_module_state->__pyx_n_s_IndicProcessor_postprocess_batch); + Py_VISIT(traverse_module_state->__pyx_n_s_IndicProcessor_preprocess_batch); + Py_VISIT(traverse_module_state->__pyx_n_s_IndicTransToolkit_processor); + Py_VISIT(traverse_module_state->__pyx_kp_s_IndicTransToolkit_processor_pyx); + Py_VISIT(traverse_module_state->__pyx_n_u_Latn); + Py_VISIT(traverse_module_state->__pyx_n_s_List); + Py_VISIT(traverse_module_state->__pyx_n_s_MosesDetokenizer); + Py_VISIT(traverse_module_state->__pyx_n_s_MosesPunctNormalizer); + Py_VISIT(traverse_module_state->__pyx_n_s_MosesTokenizer); + Py_VISIT(traverse_module_state->__pyx_n_u_Mtei); + Py_VISIT(traverse_module_state->__pyx_kp_u_None); + Py_VISIT(traverse_module_state->__pyx_n_u_Olck); + Py_VISIT(traverse_module_state->__pyx_kp_u_Post_processing); + Py_VISIT(traverse_module_state->__pyx_kp_u_Pre_processing); + Py_VISIT(traverse_module_state->__pyx_n_s_Queue); + Py_VISIT(traverse_module_state->__pyx_n_s_TypeError); + Py_VISIT(traverse_module_state->__pyx_n_s_UnicodeIndicTransliterator); + Py_VISIT(traverse_module_state->__pyx_n_s_Union); + Py_VISIT(traverse_module_state->__pyx_n_u__10); + Py_VISIT(traverse_module_state->__pyx_n_u__100); + Py_VISIT(traverse_module_state->__pyx_kp_u__101); + Py_VISIT(traverse_module_state->__pyx_kp_u__102); + Py_VISIT(traverse_module_state->__pyx_kp_u__103); + Py_VISIT(traverse_module_state->__pyx_kp_u__104); + Py_VISIT(traverse_module_state->__pyx_kp_u__105); + Py_VISIT(traverse_module_state->__pyx_kp_u__106); + Py_VISIT(traverse_module_state->__pyx_kp_u__107); + Py_VISIT(traverse_module_state->__pyx_kp_u__108); + Py_VISIT(traverse_module_state->__pyx_kp_u__109); + Py_VISIT(traverse_module_state->__pyx_n_u__11); + Py_VISIT(traverse_module_state->__pyx_kp_u__110); + Py_VISIT(traverse_module_state->__pyx_kp_u__111); + Py_VISIT(traverse_module_state->__pyx_kp_u__112); + Py_VISIT(traverse_module_state->__pyx_kp_u__113); + Py_VISIT(traverse_module_state->__pyx_kp_u__114); + Py_VISIT(traverse_module_state->__pyx_kp_u__115); + Py_VISIT(traverse_module_state->__pyx_kp_u__116); + Py_VISIT(traverse_module_state->__pyx_kp_u__117); + Py_VISIT(traverse_module_state->__pyx_kp_u__118); + Py_VISIT(traverse_module_state->__pyx_kp_u__119); + Py_VISIT(traverse_module_state->__pyx_n_u__12); + Py_VISIT(traverse_module_state->__pyx_kp_u__120); + Py_VISIT(traverse_module_state->__pyx_kp_u__121); + Py_VISIT(traverse_module_state->__pyx_kp_u__122); + Py_VISIT(traverse_module_state->__pyx_kp_u__123); + Py_VISIT(traverse_module_state->__pyx_kp_u__124); + Py_VISIT(traverse_module_state->__pyx_kp_u__125); + Py_VISIT(traverse_module_state->__pyx_kp_u__126); + Py_VISIT(traverse_module_state->__pyx_kp_u__127); + Py_VISIT(traverse_module_state->__pyx_kp_u__128); + Py_VISIT(traverse_module_state->__pyx_kp_u__129); + Py_VISIT(traverse_module_state->__pyx_n_u__13); + Py_VISIT(traverse_module_state->__pyx_kp_u__130); + Py_VISIT(traverse_module_state->__pyx_kp_u__131); + Py_VISIT(traverse_module_state->__pyx_kp_u__132); + Py_VISIT(traverse_module_state->__pyx_kp_u__133); + Py_VISIT(traverse_module_state->__pyx_kp_u__134); + Py_VISIT(traverse_module_state->__pyx_kp_u__135); + Py_VISIT(traverse_module_state->__pyx_kp_u__136); + Py_VISIT(traverse_module_state->__pyx_kp_u__137); + Py_VISIT(traverse_module_state->__pyx_n_u__138); + Py_VISIT(traverse_module_state->__pyx_kp_u__139); + Py_VISIT(traverse_module_state->__pyx_n_u__14); + Py_VISIT(traverse_module_state->__pyx_kp_u__140); + Py_VISIT(traverse_module_state->__pyx_kp_u__141); + Py_VISIT(traverse_module_state->__pyx_kp_u__142); + Py_VISIT(traverse_module_state->__pyx_kp_u__143); + Py_VISIT(traverse_module_state->__pyx_kp_u__144); + Py_VISIT(traverse_module_state->__pyx_kp_u__145); + Py_VISIT(traverse_module_state->__pyx_kp_u__146); + Py_VISIT(traverse_module_state->__pyx_kp_u__147); + Py_VISIT(traverse_module_state->__pyx_kp_u__148); + Py_VISIT(traverse_module_state->__pyx_kp_u__149); + Py_VISIT(traverse_module_state->__pyx_n_u__15); + Py_VISIT(traverse_module_state->__pyx_kp_u__151); + Py_VISIT(traverse_module_state->__pyx_kp_u__153); + Py_VISIT(traverse_module_state->__pyx_kp_u__154); + Py_VISIT(traverse_module_state->__pyx_n_u__155); + Py_VISIT(traverse_module_state->__pyx_kp_u__156); + Py_VISIT(traverse_module_state->__pyx_kp_u__157); + Py_VISIT(traverse_module_state->__pyx_kp_u__159); + Py_VISIT(traverse_module_state->__pyx_n_u__16); + Py_VISIT(traverse_module_state->__pyx_kp_u__160); + Py_VISIT(traverse_module_state->__pyx_kp_u__162); + Py_VISIT(traverse_module_state->__pyx_kp_u__163); + Py_VISIT(traverse_module_state->__pyx_kp_u__165); + Py_VISIT(traverse_module_state->__pyx_n_u__166); + Py_VISIT(traverse_module_state->__pyx_kp_u__168); + Py_VISIT(traverse_module_state->__pyx_n_u__169); + Py_VISIT(traverse_module_state->__pyx_n_u__17); + Py_VISIT(traverse_module_state->__pyx_n_s__171); + Py_VISIT(traverse_module_state->__pyx_n_u__18); + Py_VISIT(traverse_module_state->__pyx_n_s__182); + Py_VISIT(traverse_module_state->__pyx_n_u__19); + Py_VISIT(traverse_module_state->__pyx_n_u__2); + Py_VISIT(traverse_module_state->__pyx_n_u__20); + Py_VISIT(traverse_module_state->__pyx_n_u__21); + Py_VISIT(traverse_module_state->__pyx_n_u__22); + Py_VISIT(traverse_module_state->__pyx_n_u__23); + Py_VISIT(traverse_module_state->__pyx_n_u__24); + Py_VISIT(traverse_module_state->__pyx_n_u__25); + Py_VISIT(traverse_module_state->__pyx_n_u__26); + Py_VISIT(traverse_module_state->__pyx_n_u__27); + Py_VISIT(traverse_module_state->__pyx_n_u__28); + Py_VISIT(traverse_module_state->__pyx_n_u__29); + Py_VISIT(traverse_module_state->__pyx_n_u__3); + Py_VISIT(traverse_module_state->__pyx_n_u__30); + Py_VISIT(traverse_module_state->__pyx_n_u__31); + Py_VISIT(traverse_module_state->__pyx_n_u__32); + Py_VISIT(traverse_module_state->__pyx_n_u__33); + Py_VISIT(traverse_module_state->__pyx_n_u__34); + Py_VISIT(traverse_module_state->__pyx_n_u__35); + Py_VISIT(traverse_module_state->__pyx_n_u__36); + Py_VISIT(traverse_module_state->__pyx_n_u__37); + Py_VISIT(traverse_module_state->__pyx_n_u__38); + Py_VISIT(traverse_module_state->__pyx_n_u__39); + Py_VISIT(traverse_module_state->__pyx_n_u__4); + Py_VISIT(traverse_module_state->__pyx_n_u__40); + Py_VISIT(traverse_module_state->__pyx_n_u__41); + Py_VISIT(traverse_module_state->__pyx_n_u__42); + Py_VISIT(traverse_module_state->__pyx_n_u__43); + Py_VISIT(traverse_module_state->__pyx_n_u__44); + Py_VISIT(traverse_module_state->__pyx_n_u__45); + Py_VISIT(traverse_module_state->__pyx_n_u__46); + Py_VISIT(traverse_module_state->__pyx_n_u__47); + Py_VISIT(traverse_module_state->__pyx_n_u__48); + Py_VISIT(traverse_module_state->__pyx_n_u__49); + Py_VISIT(traverse_module_state->__pyx_n_u__5); + Py_VISIT(traverse_module_state->__pyx_n_u__50); + Py_VISIT(traverse_module_state->__pyx_n_u__51); + Py_VISIT(traverse_module_state->__pyx_n_u__52); + Py_VISIT(traverse_module_state->__pyx_n_u__53); + Py_VISIT(traverse_module_state->__pyx_n_u__54); + Py_VISIT(traverse_module_state->__pyx_n_u__55); + Py_VISIT(traverse_module_state->__pyx_n_u__56); + Py_VISIT(traverse_module_state->__pyx_n_u__57); + Py_VISIT(traverse_module_state->__pyx_n_u__58); + Py_VISIT(traverse_module_state->__pyx_n_u__59); + Py_VISIT(traverse_module_state->__pyx_n_u__6); + Py_VISIT(traverse_module_state->__pyx_n_u__60); + Py_VISIT(traverse_module_state->__pyx_n_u__61); + Py_VISIT(traverse_module_state->__pyx_n_u__62); + Py_VISIT(traverse_module_state->__pyx_n_u__63); + Py_VISIT(traverse_module_state->__pyx_n_u__64); + Py_VISIT(traverse_module_state->__pyx_n_u__65); + Py_VISIT(traverse_module_state->__pyx_n_u__66); + Py_VISIT(traverse_module_state->__pyx_n_u__67); + Py_VISIT(traverse_module_state->__pyx_n_u__68); + Py_VISIT(traverse_module_state->__pyx_n_u__69); + Py_VISIT(traverse_module_state->__pyx_n_u__7); + Py_VISIT(traverse_module_state->__pyx_n_u__70); + Py_VISIT(traverse_module_state->__pyx_n_u__71); + Py_VISIT(traverse_module_state->__pyx_n_u__72); + Py_VISIT(traverse_module_state->__pyx_n_u__73); + Py_VISIT(traverse_module_state->__pyx_n_u__74); + Py_VISIT(traverse_module_state->__pyx_n_u__75); + Py_VISIT(traverse_module_state->__pyx_n_u__76); + Py_VISIT(traverse_module_state->__pyx_n_u__77); + Py_VISIT(traverse_module_state->__pyx_n_u__78); + Py_VISIT(traverse_module_state->__pyx_n_u__79); + Py_VISIT(traverse_module_state->__pyx_n_u__8); + Py_VISIT(traverse_module_state->__pyx_n_u__80); + Py_VISIT(traverse_module_state->__pyx_n_u__81); + Py_VISIT(traverse_module_state->__pyx_n_u__82); + Py_VISIT(traverse_module_state->__pyx_n_u__83); + Py_VISIT(traverse_module_state->__pyx_n_u__84); + Py_VISIT(traverse_module_state->__pyx_n_u__85); + Py_VISIT(traverse_module_state->__pyx_n_u__86); + Py_VISIT(traverse_module_state->__pyx_n_u__87); + Py_VISIT(traverse_module_state->__pyx_n_u__88); + Py_VISIT(traverse_module_state->__pyx_n_u__89); + Py_VISIT(traverse_module_state->__pyx_n_u__9); + Py_VISIT(traverse_module_state->__pyx_n_u__90); + Py_VISIT(traverse_module_state->__pyx_n_u__91); + Py_VISIT(traverse_module_state->__pyx_n_u__92); + Py_VISIT(traverse_module_state->__pyx_n_u__93); + Py_VISIT(traverse_module_state->__pyx_n_u__94); + Py_VISIT(traverse_module_state->__pyx_n_u__95); + Py_VISIT(traverse_module_state->__pyx_n_u__96); + Py_VISIT(traverse_module_state->__pyx_n_u__97); + Py_VISIT(traverse_module_state->__pyx_n_u__98); + Py_VISIT(traverse_module_state->__pyx_n_u__99); + Py_VISIT(traverse_module_state->__pyx_n_u_as); + Py_VISIT(traverse_module_state->__pyx_n_u_asm_Beng); + Py_VISIT(traverse_module_state->__pyx_n_s_asyncio_coroutines); + Py_VISIT(traverse_module_state->__pyx_n_u_awa_Deva); + Py_VISIT(traverse_module_state->__pyx_kp_u_b_w_https_ftp_w_w_w_b); + Py_VISIT(traverse_module_state->__pyx_n_s_batch); + Py_VISIT(traverse_module_state->__pyx_n_u_ben_Beng); + Py_VISIT(traverse_module_state->__pyx_n_u_bho_Deva); + Py_VISIT(traverse_module_state->__pyx_n_u_bn); + Py_VISIT(traverse_module_state->__pyx_n_u_brx_Deva); + Py_VISIT(traverse_module_state->__pyx_n_s_chr); + Py_VISIT(traverse_module_state->__pyx_n_s_cinit___locals_lambda); + Py_VISIT(traverse_module_state->__pyx_n_s_clear); + Py_VISIT(traverse_module_state->__pyx_n_s_cline_in_traceback); + Py_VISIT(traverse_module_state->__pyx_n_s_compile); + Py_VISIT(traverse_module_state->__pyx_kp_u_d); + Py_VISIT(traverse_module_state->__pyx_kp_u_d_d); + Py_VISIT(traverse_module_state->__pyx_kp_u_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d); + Py_VISIT(traverse_module_state->__pyx_n_s_desc); + Py_VISIT(traverse_module_state->__pyx_n_s_detokenize); + Py_VISIT(traverse_module_state->__pyx_kp_u_disable); + Py_VISIT(traverse_module_state->__pyx_n_u_doi_Deva); + Py_VISIT(traverse_module_state->__pyx_n_u_en); + Py_VISIT(traverse_module_state->__pyx_kp_u_enable); + Py_VISIT(traverse_module_state->__pyx_n_u_eng_Latn); + Py_VISIT(traverse_module_state->__pyx_n_s_escape); + Py_VISIT(traverse_module_state->__pyx_n_s_findall); + Py_VISIT(traverse_module_state->__pyx_kp_u_gc); + Py_VISIT(traverse_module_state->__pyx_n_s_get); + Py_VISIT(traverse_module_state->__pyx_n_s_get_normalizer); + Py_VISIT(traverse_module_state->__pyx_n_s_getstate); + Py_VISIT(traverse_module_state->__pyx_n_u_gom_Deva); + Py_VISIT(traverse_module_state->__pyx_n_u_gon_Deva); + Py_VISIT(traverse_module_state->__pyx_n_s_group); + Py_VISIT(traverse_module_state->__pyx_n_u_gu); + Py_VISIT(traverse_module_state->__pyx_n_u_guj_Gujr); + Py_VISIT(traverse_module_state->__pyx_n_u_hi); + Py_VISIT(traverse_module_state->__pyx_n_u_hin_Deva); + Py_VISIT(traverse_module_state->__pyx_n_u_hne_Deva); + Py_VISIT(traverse_module_state->__pyx_n_s_import); + Py_VISIT(traverse_module_state->__pyx_n_s_indic_detokenize); + Py_VISIT(traverse_module_state->__pyx_n_s_indic_tokenize); + Py_VISIT(traverse_module_state->__pyx_n_s_indicnlp_normalize_indic_normali); + Py_VISIT(traverse_module_state->__pyx_n_s_indicnlp_tokenize); + Py_VISIT(traverse_module_state->__pyx_n_s_indicnlp_transliterate_unicode_t); + Py_VISIT(traverse_module_state->__pyx_n_s_inference); + Py_VISIT(traverse_module_state->__pyx_n_s_initializing); + Py_VISIT(traverse_module_state->__pyx_n_s_is_coroutine); + Py_VISIT(traverse_module_state->__pyx_n_s_is_target); + Py_VISIT(traverse_module_state->__pyx_kp_u_isenabled); + Py_VISIT(traverse_module_state->__pyx_n_s_items); + Py_VISIT(traverse_module_state->__pyx_n_u_kK); + Py_VISIT(traverse_module_state->__pyx_n_u_kan_Knda); + Py_VISIT(traverse_module_state->__pyx_n_u_kas_Arab); + Py_VISIT(traverse_module_state->__pyx_n_u_kas_Deva); + Py_VISIT(traverse_module_state->__pyx_n_u_kha_Latn); + Py_VISIT(traverse_module_state->__pyx_n_u_kn); + Py_VISIT(traverse_module_state->__pyx_n_s_lang); + Py_VISIT(traverse_module_state->__pyx_n_u_line); + Py_VISIT(traverse_module_state->__pyx_n_u_lus_Latn); + Py_VISIT(traverse_module_state->__pyx_n_s_m); + Py_VISIT(traverse_module_state->__pyx_n_u_mag_Deva); + Py_VISIT(traverse_module_state->__pyx_n_u_mai_Deva); + Py_VISIT(traverse_module_state->__pyx_n_s_main); + Py_VISIT(traverse_module_state->__pyx_n_u_mal_Mlym); + Py_VISIT(traverse_module_state->__pyx_n_u_mar_Deva); + Py_VISIT(traverse_module_state->__pyx_n_u_ml); + Py_VISIT(traverse_module_state->__pyx_n_u_mni_Beng); + Py_VISIT(traverse_module_state->__pyx_n_u_mni_Mtei); + Py_VISIT(traverse_module_state->__pyx_n_u_mr); + Py_VISIT(traverse_module_state->__pyx_kp_u_n); + Py_VISIT(traverse_module_state->__pyx_kp_u_n_2); + Py_VISIT(traverse_module_state->__pyx_n_s_name); + Py_VISIT(traverse_module_state->__pyx_n_u_ne); + Py_VISIT(traverse_module_state->__pyx_kp_s_no_default___reduce___due_to_non); + Py_VISIT(traverse_module_state->__pyx_n_s_normalize); + Py_VISIT(traverse_module_state->__pyx_n_u_npi_Deva); + Py_VISIT(traverse_module_state->__pyx_n_u_or); + Py_VISIT(traverse_module_state->__pyx_n_u_ory); + Py_VISIT(traverse_module_state->__pyx_n_u_ory_Orya); + Py_VISIT(traverse_module_state->__pyx_n_u_pa); + Py_VISIT(traverse_module_state->__pyx_n_u_pan_Guru); + Py_VISIT(traverse_module_state->__pyx_n_s_postprocess_batch); + Py_VISIT(traverse_module_state->__pyx_n_s_preprocess_batch); + Py_VISIT(traverse_module_state->__pyx_n_s_put); + Py_VISIT(traverse_module_state->__pyx_n_s_pyx_state); + Py_VISIT(traverse_module_state->__pyx_n_s_pyx_vtable); + Py_VISIT(traverse_module_state->__pyx_n_s_queue); + Py_VISIT(traverse_module_state->__pyx_kp_u_r); + Py_VISIT(traverse_module_state->__pyx_n_s_range); + Py_VISIT(traverse_module_state->__pyx_n_s_re); + Py_VISIT(traverse_module_state->__pyx_n_s_reduce); + Py_VISIT(traverse_module_state->__pyx_n_s_reduce_cython); + Py_VISIT(traverse_module_state->__pyx_n_s_reduce_ex); + Py_VISIT(traverse_module_state->__pyx_n_s_regex); + Py_VISIT(traverse_module_state->__pyx_n_s_replace); + Py_VISIT(traverse_module_state->__pyx_kp_u_s); + Py_VISIT(traverse_module_state->__pyx_kp_u_s_2); + Py_VISIT(traverse_module_state->__pyx_kp_u_s_3); + Py_VISIT(traverse_module_state->__pyx_kp_u_s_s); + Py_VISIT(traverse_module_state->__pyx_kp_u_s_s_2); + Py_VISIT(traverse_module_state->__pyx_n_s_sacremoses); + Py_VISIT(traverse_module_state->__pyx_n_u_san_Deva); + Py_VISIT(traverse_module_state->__pyx_n_u_sat_Olck); + Py_VISIT(traverse_module_state->__pyx_n_s_self); + Py_VISIT(traverse_module_state->__pyx_n_s_sents); + Py_VISIT(traverse_module_state->__pyx_n_s_setstate); + Py_VISIT(traverse_module_state->__pyx_n_s_setstate_cython); + Py_VISIT(traverse_module_state->__pyx_n_u_snd_Arab); + Py_VISIT(traverse_module_state->__pyx_n_u_snd_Deva); + Py_VISIT(traverse_module_state->__pyx_n_s_spec); + Py_VISIT(traverse_module_state->__pyx_n_s_split); + Py_VISIT(traverse_module_state->__pyx_n_s_src_lang); + Py_VISIT(traverse_module_state->__pyx_kp_s_stringsource); + Py_VISIT(traverse_module_state->__pyx_n_s_strip); + Py_VISIT(traverse_module_state->__pyx_n_s_sub); + Py_VISIT(traverse_module_state->__pyx_n_u_ta); + Py_VISIT(traverse_module_state->__pyx_n_u_tam_Taml); + Py_VISIT(traverse_module_state->__pyx_n_u_te); + Py_VISIT(traverse_module_state->__pyx_n_u_tel_Telu); + Py_VISIT(traverse_module_state->__pyx_n_s_test); + Py_VISIT(traverse_module_state->__pyx_n_s_tgt_lang); + Py_VISIT(traverse_module_state->__pyx_n_s_tokenize); + Py_VISIT(traverse_module_state->__pyx_n_s_total); + Py_VISIT(traverse_module_state->__pyx_n_s_tqdm); + Py_VISIT(traverse_module_state->__pyx_n_s_translate); + Py_VISIT(traverse_module_state->__pyx_n_s_transliterate); + Py_VISIT(traverse_module_state->__pyx_n_s_trivial_detokenize); + Py_VISIT(traverse_module_state->__pyx_n_s_trivial_tokenize); + Py_VISIT(traverse_module_state->__pyx_n_s_typing); + Py_VISIT(traverse_module_state->__pyx_n_s_unit); + Py_VISIT(traverse_module_state->__pyx_n_u_unr_Deva); + Py_VISIT(traverse_module_state->__pyx_n_u_ur); + Py_VISIT(traverse_module_state->__pyx_n_u_urd_Arab); + Py_VISIT(traverse_module_state->__pyx_n_s_visualize); + Py_VISIT(traverse_module_state->__pyx_int_0); + Py_VISIT(traverse_module_state->__pyx_tuple__150); + Py_VISIT(traverse_module_state->__pyx_tuple__152); + Py_VISIT(traverse_module_state->__pyx_tuple__158); + Py_VISIT(traverse_module_state->__pyx_tuple__161); + Py_VISIT(traverse_module_state->__pyx_tuple__164); + Py_VISIT(traverse_module_state->__pyx_tuple__167); + Py_VISIT(traverse_module_state->__pyx_tuple__170); + Py_VISIT(traverse_module_state->__pyx_tuple__172); + Py_VISIT(traverse_module_state->__pyx_tuple__174); + Py_VISIT(traverse_module_state->__pyx_tuple__175); + Py_VISIT(traverse_module_state->__pyx_tuple__177); + Py_VISIT(traverse_module_state->__pyx_tuple__178); + Py_VISIT(traverse_module_state->__pyx_tuple__180); + Py_VISIT(traverse_module_state->__pyx_codeobj__173); + Py_VISIT(traverse_module_state->__pyx_codeobj__176); + Py_VISIT(traverse_module_state->__pyx_codeobj__179); + Py_VISIT(traverse_module_state->__pyx_codeobj__181); + return 0; +} +#endif +/* #### Code section: module_state_defines ### */ +#define __pyx_d __pyx_mstate_global->__pyx_d +#define __pyx_b __pyx_mstate_global->__pyx_b +#define __pyx_cython_runtime __pyx_mstate_global->__pyx_cython_runtime +#define __pyx_empty_tuple __pyx_mstate_global->__pyx_empty_tuple +#define __pyx_empty_bytes __pyx_mstate_global->__pyx_empty_bytes +#define __pyx_empty_unicode __pyx_mstate_global->__pyx_empty_unicode +#ifdef __Pyx_CyFunction_USED +#define __pyx_CyFunctionType __pyx_mstate_global->__pyx_CyFunctionType +#endif +#ifdef __Pyx_FusedFunction_USED +#define __pyx_FusedFunctionType __pyx_mstate_global->__pyx_FusedFunctionType +#endif +#ifdef __Pyx_Generator_USED +#define __pyx_GeneratorType __pyx_mstate_global->__pyx_GeneratorType +#endif +#ifdef __Pyx_IterableCoroutine_USED +#define __pyx_IterableCoroutineType __pyx_mstate_global->__pyx_IterableCoroutineType +#endif +#ifdef __Pyx_Coroutine_USED +#define __pyx_CoroutineAwaitType __pyx_mstate_global->__pyx_CoroutineAwaitType +#endif +#ifdef __Pyx_Coroutine_USED +#define __pyx_CoroutineType __pyx_mstate_global->__pyx_CoroutineType +#endif +#if CYTHON_USE_MODULE_STATE +#define __pyx_type_17IndicTransToolkit_9processor_IndicProcessor __pyx_mstate_global->__pyx_type_17IndicTransToolkit_9processor_IndicProcessor +#endif +#define __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor __pyx_mstate_global->__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor +#define __pyx_n_u_ __pyx_mstate_global->__pyx_n_u_ +#define __pyx_kp_u_0 __pyx_mstate_global->__pyx_kp_u_0 +#define __pyx_kp_u_1 __pyx_mstate_global->__pyx_kp_u_1 +#define __pyx_kp_u_1_2 __pyx_mstate_global->__pyx_kp_u_1_2 +#define __pyx_kp_u_1_2_2 __pyx_mstate_global->__pyx_kp_u_1_2_2 +#define __pyx_kp_u_1_3 __pyx_mstate_global->__pyx_kp_u_1_3 +#define __pyx_kp_u_1_4 __pyx_mstate_global->__pyx_kp_u_1_4 +#define __pyx_kp_u_2 __pyx_mstate_global->__pyx_kp_u_2 +#define __pyx_kp_u_2_2 __pyx_mstate_global->__pyx_kp_u_2_2 +#define __pyx_kp_u_3 __pyx_mstate_global->__pyx_kp_u_3 +#define __pyx_kp_u_4 __pyx_mstate_global->__pyx_kp_u_4 +#define __pyx_kp_u_5 __pyx_mstate_global->__pyx_kp_u_5 +#define __pyx_kp_u_6 __pyx_mstate_global->__pyx_kp_u_6 +#define __pyx_kp_u_7 __pyx_mstate_global->__pyx_kp_u_7 +#define __pyx_kp_u_8 __pyx_mstate_global->__pyx_kp_u_8 +#define __pyx_kp_u_9 __pyx_mstate_global->__pyx_kp_u_9 +#define __pyx_kp_u_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2 __pyx_mstate_global->__pyx_kp_u_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2 +#define __pyx_kp_u_A_Za_z0_9_w __pyx_mstate_global->__pyx_kp_u_A_Za_z0_9_w +#define __pyx_n_u_Arab __pyx_mstate_global->__pyx_n_u_Arab +#define __pyx_n_u_Aran __pyx_mstate_global->__pyx_n_u_Aran +#define __pyx_kp_u_C __pyx_mstate_global->__pyx_kp_u_C +#define __pyx_kp_u_C_2 __pyx_mstate_global->__pyx_kp_u_C_2 +#define __pyx_n_s_Dict __pyx_mstate_global->__pyx_n_s_Dict +#define __pyx_kp_u_ID __pyx_mstate_global->__pyx_kp_u_ID +#define __pyx_kp_u_ID_2 __pyx_mstate_global->__pyx_kp_u_ID_2 +#define __pyx_kp_u_ID_3 __pyx_mstate_global->__pyx_kp_u_ID_3 +#define __pyx_kp_u_ID_4 __pyx_mstate_global->__pyx_kp_u_ID_4 +#define __pyx_kp_u_ID_5 __pyx_mstate_global->__pyx_kp_u_ID_5 +#define __pyx_n_s_IndicNormalizerFactory __pyx_mstate_global->__pyx_n_s_IndicNormalizerFactory +#define __pyx_n_s_IndicProcessor __pyx_mstate_global->__pyx_n_s_IndicProcessor +#define __pyx_n_s_IndicProcessor___reduce_cython __pyx_mstate_global->__pyx_n_s_IndicProcessor___reduce_cython +#define __pyx_n_s_IndicProcessor___setstate_cython __pyx_mstate_global->__pyx_n_s_IndicProcessor___setstate_cython +#define __pyx_n_s_IndicProcessor_postprocess_batch __pyx_mstate_global->__pyx_n_s_IndicProcessor_postprocess_batch +#define __pyx_n_s_IndicProcessor_preprocess_batch __pyx_mstate_global->__pyx_n_s_IndicProcessor_preprocess_batch +#define __pyx_n_s_IndicTransToolkit_processor __pyx_mstate_global->__pyx_n_s_IndicTransToolkit_processor +#define __pyx_kp_s_IndicTransToolkit_processor_pyx __pyx_mstate_global->__pyx_kp_s_IndicTransToolkit_processor_pyx +#define __pyx_n_u_Latn __pyx_mstate_global->__pyx_n_u_Latn +#define __pyx_n_s_List __pyx_mstate_global->__pyx_n_s_List +#define __pyx_n_s_MosesDetokenizer __pyx_mstate_global->__pyx_n_s_MosesDetokenizer +#define __pyx_n_s_MosesPunctNormalizer __pyx_mstate_global->__pyx_n_s_MosesPunctNormalizer +#define __pyx_n_s_MosesTokenizer __pyx_mstate_global->__pyx_n_s_MosesTokenizer +#define __pyx_n_u_Mtei __pyx_mstate_global->__pyx_n_u_Mtei +#define __pyx_kp_u_None __pyx_mstate_global->__pyx_kp_u_None +#define __pyx_n_u_Olck __pyx_mstate_global->__pyx_n_u_Olck +#define __pyx_kp_u_Post_processing __pyx_mstate_global->__pyx_kp_u_Post_processing +#define __pyx_kp_u_Pre_processing __pyx_mstate_global->__pyx_kp_u_Pre_processing +#define __pyx_n_s_Queue __pyx_mstate_global->__pyx_n_s_Queue +#define __pyx_n_s_TypeError __pyx_mstate_global->__pyx_n_s_TypeError +#define __pyx_n_s_UnicodeIndicTransliterator __pyx_mstate_global->__pyx_n_s_UnicodeIndicTransliterator +#define __pyx_n_s_Union __pyx_mstate_global->__pyx_n_s_Union +#define __pyx_n_u__10 __pyx_mstate_global->__pyx_n_u__10 +#define __pyx_n_u__100 __pyx_mstate_global->__pyx_n_u__100 +#define __pyx_kp_u__101 __pyx_mstate_global->__pyx_kp_u__101 +#define __pyx_kp_u__102 __pyx_mstate_global->__pyx_kp_u__102 +#define __pyx_kp_u__103 __pyx_mstate_global->__pyx_kp_u__103 +#define __pyx_kp_u__104 __pyx_mstate_global->__pyx_kp_u__104 +#define __pyx_kp_u__105 __pyx_mstate_global->__pyx_kp_u__105 +#define __pyx_kp_u__106 __pyx_mstate_global->__pyx_kp_u__106 +#define __pyx_kp_u__107 __pyx_mstate_global->__pyx_kp_u__107 +#define __pyx_kp_u__108 __pyx_mstate_global->__pyx_kp_u__108 +#define __pyx_kp_u__109 __pyx_mstate_global->__pyx_kp_u__109 +#define __pyx_n_u__11 __pyx_mstate_global->__pyx_n_u__11 +#define __pyx_kp_u__110 __pyx_mstate_global->__pyx_kp_u__110 +#define __pyx_kp_u__111 __pyx_mstate_global->__pyx_kp_u__111 +#define __pyx_kp_u__112 __pyx_mstate_global->__pyx_kp_u__112 +#define __pyx_kp_u__113 __pyx_mstate_global->__pyx_kp_u__113 +#define __pyx_kp_u__114 __pyx_mstate_global->__pyx_kp_u__114 +#define __pyx_kp_u__115 __pyx_mstate_global->__pyx_kp_u__115 +#define __pyx_kp_u__116 __pyx_mstate_global->__pyx_kp_u__116 +#define __pyx_kp_u__117 __pyx_mstate_global->__pyx_kp_u__117 +#define __pyx_kp_u__118 __pyx_mstate_global->__pyx_kp_u__118 +#define __pyx_kp_u__119 __pyx_mstate_global->__pyx_kp_u__119 +#define __pyx_n_u__12 __pyx_mstate_global->__pyx_n_u__12 +#define __pyx_kp_u__120 __pyx_mstate_global->__pyx_kp_u__120 +#define __pyx_kp_u__121 __pyx_mstate_global->__pyx_kp_u__121 +#define __pyx_kp_u__122 __pyx_mstate_global->__pyx_kp_u__122 +#define __pyx_kp_u__123 __pyx_mstate_global->__pyx_kp_u__123 +#define __pyx_kp_u__124 __pyx_mstate_global->__pyx_kp_u__124 +#define __pyx_kp_u__125 __pyx_mstate_global->__pyx_kp_u__125 +#define __pyx_kp_u__126 __pyx_mstate_global->__pyx_kp_u__126 +#define __pyx_kp_u__127 __pyx_mstate_global->__pyx_kp_u__127 +#define __pyx_kp_u__128 __pyx_mstate_global->__pyx_kp_u__128 +#define __pyx_kp_u__129 __pyx_mstate_global->__pyx_kp_u__129 +#define __pyx_n_u__13 __pyx_mstate_global->__pyx_n_u__13 +#define __pyx_kp_u__130 __pyx_mstate_global->__pyx_kp_u__130 +#define __pyx_kp_u__131 __pyx_mstate_global->__pyx_kp_u__131 +#define __pyx_kp_u__132 __pyx_mstate_global->__pyx_kp_u__132 +#define __pyx_kp_u__133 __pyx_mstate_global->__pyx_kp_u__133 +#define __pyx_kp_u__134 __pyx_mstate_global->__pyx_kp_u__134 +#define __pyx_kp_u__135 __pyx_mstate_global->__pyx_kp_u__135 +#define __pyx_kp_u__136 __pyx_mstate_global->__pyx_kp_u__136 +#define __pyx_kp_u__137 __pyx_mstate_global->__pyx_kp_u__137 +#define __pyx_n_u__138 __pyx_mstate_global->__pyx_n_u__138 +#define __pyx_kp_u__139 __pyx_mstate_global->__pyx_kp_u__139 +#define __pyx_n_u__14 __pyx_mstate_global->__pyx_n_u__14 +#define __pyx_kp_u__140 __pyx_mstate_global->__pyx_kp_u__140 +#define __pyx_kp_u__141 __pyx_mstate_global->__pyx_kp_u__141 +#define __pyx_kp_u__142 __pyx_mstate_global->__pyx_kp_u__142 +#define __pyx_kp_u__143 __pyx_mstate_global->__pyx_kp_u__143 +#define __pyx_kp_u__144 __pyx_mstate_global->__pyx_kp_u__144 +#define __pyx_kp_u__145 __pyx_mstate_global->__pyx_kp_u__145 +#define __pyx_kp_u__146 __pyx_mstate_global->__pyx_kp_u__146 +#define __pyx_kp_u__147 __pyx_mstate_global->__pyx_kp_u__147 +#define __pyx_kp_u__148 __pyx_mstate_global->__pyx_kp_u__148 +#define __pyx_kp_u__149 __pyx_mstate_global->__pyx_kp_u__149 +#define __pyx_n_u__15 __pyx_mstate_global->__pyx_n_u__15 +#define __pyx_kp_u__151 __pyx_mstate_global->__pyx_kp_u__151 +#define __pyx_kp_u__153 __pyx_mstate_global->__pyx_kp_u__153 +#define __pyx_kp_u__154 __pyx_mstate_global->__pyx_kp_u__154 +#define __pyx_n_u__155 __pyx_mstate_global->__pyx_n_u__155 +#define __pyx_kp_u__156 __pyx_mstate_global->__pyx_kp_u__156 +#define __pyx_kp_u__157 __pyx_mstate_global->__pyx_kp_u__157 +#define __pyx_kp_u__159 __pyx_mstate_global->__pyx_kp_u__159 +#define __pyx_n_u__16 __pyx_mstate_global->__pyx_n_u__16 +#define __pyx_kp_u__160 __pyx_mstate_global->__pyx_kp_u__160 +#define __pyx_kp_u__162 __pyx_mstate_global->__pyx_kp_u__162 +#define __pyx_kp_u__163 __pyx_mstate_global->__pyx_kp_u__163 +#define __pyx_kp_u__165 __pyx_mstate_global->__pyx_kp_u__165 +#define __pyx_n_u__166 __pyx_mstate_global->__pyx_n_u__166 +#define __pyx_kp_u__168 __pyx_mstate_global->__pyx_kp_u__168 +#define __pyx_n_u__169 __pyx_mstate_global->__pyx_n_u__169 +#define __pyx_n_u__17 __pyx_mstate_global->__pyx_n_u__17 +#define __pyx_n_s__171 __pyx_mstate_global->__pyx_n_s__171 +#define __pyx_n_u__18 __pyx_mstate_global->__pyx_n_u__18 +#define __pyx_n_s__182 __pyx_mstate_global->__pyx_n_s__182 +#define __pyx_n_u__19 __pyx_mstate_global->__pyx_n_u__19 +#define __pyx_n_u__2 __pyx_mstate_global->__pyx_n_u__2 +#define __pyx_n_u__20 __pyx_mstate_global->__pyx_n_u__20 +#define __pyx_n_u__21 __pyx_mstate_global->__pyx_n_u__21 +#define __pyx_n_u__22 __pyx_mstate_global->__pyx_n_u__22 +#define __pyx_n_u__23 __pyx_mstate_global->__pyx_n_u__23 +#define __pyx_n_u__24 __pyx_mstate_global->__pyx_n_u__24 +#define __pyx_n_u__25 __pyx_mstate_global->__pyx_n_u__25 +#define __pyx_n_u__26 __pyx_mstate_global->__pyx_n_u__26 +#define __pyx_n_u__27 __pyx_mstate_global->__pyx_n_u__27 +#define __pyx_n_u__28 __pyx_mstate_global->__pyx_n_u__28 +#define __pyx_n_u__29 __pyx_mstate_global->__pyx_n_u__29 +#define __pyx_n_u__3 __pyx_mstate_global->__pyx_n_u__3 +#define __pyx_n_u__30 __pyx_mstate_global->__pyx_n_u__30 +#define __pyx_n_u__31 __pyx_mstate_global->__pyx_n_u__31 +#define __pyx_n_u__32 __pyx_mstate_global->__pyx_n_u__32 +#define __pyx_n_u__33 __pyx_mstate_global->__pyx_n_u__33 +#define __pyx_n_u__34 __pyx_mstate_global->__pyx_n_u__34 +#define __pyx_n_u__35 __pyx_mstate_global->__pyx_n_u__35 +#define __pyx_n_u__36 __pyx_mstate_global->__pyx_n_u__36 +#define __pyx_n_u__37 __pyx_mstate_global->__pyx_n_u__37 +#define __pyx_n_u__38 __pyx_mstate_global->__pyx_n_u__38 +#define __pyx_n_u__39 __pyx_mstate_global->__pyx_n_u__39 +#define __pyx_n_u__4 __pyx_mstate_global->__pyx_n_u__4 +#define __pyx_n_u__40 __pyx_mstate_global->__pyx_n_u__40 +#define __pyx_n_u__41 __pyx_mstate_global->__pyx_n_u__41 +#define __pyx_n_u__42 __pyx_mstate_global->__pyx_n_u__42 +#define __pyx_n_u__43 __pyx_mstate_global->__pyx_n_u__43 +#define __pyx_n_u__44 __pyx_mstate_global->__pyx_n_u__44 +#define __pyx_n_u__45 __pyx_mstate_global->__pyx_n_u__45 +#define __pyx_n_u__46 __pyx_mstate_global->__pyx_n_u__46 +#define __pyx_n_u__47 __pyx_mstate_global->__pyx_n_u__47 +#define __pyx_n_u__48 __pyx_mstate_global->__pyx_n_u__48 +#define __pyx_n_u__49 __pyx_mstate_global->__pyx_n_u__49 +#define __pyx_n_u__5 __pyx_mstate_global->__pyx_n_u__5 +#define __pyx_n_u__50 __pyx_mstate_global->__pyx_n_u__50 +#define __pyx_n_u__51 __pyx_mstate_global->__pyx_n_u__51 +#define __pyx_n_u__52 __pyx_mstate_global->__pyx_n_u__52 +#define __pyx_n_u__53 __pyx_mstate_global->__pyx_n_u__53 +#define __pyx_n_u__54 __pyx_mstate_global->__pyx_n_u__54 +#define __pyx_n_u__55 __pyx_mstate_global->__pyx_n_u__55 +#define __pyx_n_u__56 __pyx_mstate_global->__pyx_n_u__56 +#define __pyx_n_u__57 __pyx_mstate_global->__pyx_n_u__57 +#define __pyx_n_u__58 __pyx_mstate_global->__pyx_n_u__58 +#define __pyx_n_u__59 __pyx_mstate_global->__pyx_n_u__59 +#define __pyx_n_u__6 __pyx_mstate_global->__pyx_n_u__6 +#define __pyx_n_u__60 __pyx_mstate_global->__pyx_n_u__60 +#define __pyx_n_u__61 __pyx_mstate_global->__pyx_n_u__61 +#define __pyx_n_u__62 __pyx_mstate_global->__pyx_n_u__62 +#define __pyx_n_u__63 __pyx_mstate_global->__pyx_n_u__63 +#define __pyx_n_u__64 __pyx_mstate_global->__pyx_n_u__64 +#define __pyx_n_u__65 __pyx_mstate_global->__pyx_n_u__65 +#define __pyx_n_u__66 __pyx_mstate_global->__pyx_n_u__66 +#define __pyx_n_u__67 __pyx_mstate_global->__pyx_n_u__67 +#define __pyx_n_u__68 __pyx_mstate_global->__pyx_n_u__68 +#define __pyx_n_u__69 __pyx_mstate_global->__pyx_n_u__69 +#define __pyx_n_u__7 __pyx_mstate_global->__pyx_n_u__7 +#define __pyx_n_u__70 __pyx_mstate_global->__pyx_n_u__70 +#define __pyx_n_u__71 __pyx_mstate_global->__pyx_n_u__71 +#define __pyx_n_u__72 __pyx_mstate_global->__pyx_n_u__72 +#define __pyx_n_u__73 __pyx_mstate_global->__pyx_n_u__73 +#define __pyx_n_u__74 __pyx_mstate_global->__pyx_n_u__74 +#define __pyx_n_u__75 __pyx_mstate_global->__pyx_n_u__75 +#define __pyx_n_u__76 __pyx_mstate_global->__pyx_n_u__76 +#define __pyx_n_u__77 __pyx_mstate_global->__pyx_n_u__77 +#define __pyx_n_u__78 __pyx_mstate_global->__pyx_n_u__78 +#define __pyx_n_u__79 __pyx_mstate_global->__pyx_n_u__79 +#define __pyx_n_u__8 __pyx_mstate_global->__pyx_n_u__8 +#define __pyx_n_u__80 __pyx_mstate_global->__pyx_n_u__80 +#define __pyx_n_u__81 __pyx_mstate_global->__pyx_n_u__81 +#define __pyx_n_u__82 __pyx_mstate_global->__pyx_n_u__82 +#define __pyx_n_u__83 __pyx_mstate_global->__pyx_n_u__83 +#define __pyx_n_u__84 __pyx_mstate_global->__pyx_n_u__84 +#define __pyx_n_u__85 __pyx_mstate_global->__pyx_n_u__85 +#define __pyx_n_u__86 __pyx_mstate_global->__pyx_n_u__86 +#define __pyx_n_u__87 __pyx_mstate_global->__pyx_n_u__87 +#define __pyx_n_u__88 __pyx_mstate_global->__pyx_n_u__88 +#define __pyx_n_u__89 __pyx_mstate_global->__pyx_n_u__89 +#define __pyx_n_u__9 __pyx_mstate_global->__pyx_n_u__9 +#define __pyx_n_u__90 __pyx_mstate_global->__pyx_n_u__90 +#define __pyx_n_u__91 __pyx_mstate_global->__pyx_n_u__91 +#define __pyx_n_u__92 __pyx_mstate_global->__pyx_n_u__92 +#define __pyx_n_u__93 __pyx_mstate_global->__pyx_n_u__93 +#define __pyx_n_u__94 __pyx_mstate_global->__pyx_n_u__94 +#define __pyx_n_u__95 __pyx_mstate_global->__pyx_n_u__95 +#define __pyx_n_u__96 __pyx_mstate_global->__pyx_n_u__96 +#define __pyx_n_u__97 __pyx_mstate_global->__pyx_n_u__97 +#define __pyx_n_u__98 __pyx_mstate_global->__pyx_n_u__98 +#define __pyx_n_u__99 __pyx_mstate_global->__pyx_n_u__99 +#define __pyx_n_u_as __pyx_mstate_global->__pyx_n_u_as +#define __pyx_n_u_asm_Beng __pyx_mstate_global->__pyx_n_u_asm_Beng +#define __pyx_n_s_asyncio_coroutines __pyx_mstate_global->__pyx_n_s_asyncio_coroutines +#define __pyx_n_u_awa_Deva __pyx_mstate_global->__pyx_n_u_awa_Deva +#define __pyx_kp_u_b_w_https_ftp_w_w_w_b __pyx_mstate_global->__pyx_kp_u_b_w_https_ftp_w_w_w_b +#define __pyx_n_s_batch __pyx_mstate_global->__pyx_n_s_batch +#define __pyx_n_u_ben_Beng __pyx_mstate_global->__pyx_n_u_ben_Beng +#define __pyx_n_u_bho_Deva __pyx_mstate_global->__pyx_n_u_bho_Deva +#define __pyx_n_u_bn __pyx_mstate_global->__pyx_n_u_bn +#define __pyx_n_u_brx_Deva __pyx_mstate_global->__pyx_n_u_brx_Deva +#define __pyx_n_s_chr __pyx_mstate_global->__pyx_n_s_chr +#define __pyx_n_s_cinit___locals_lambda __pyx_mstate_global->__pyx_n_s_cinit___locals_lambda +#define __pyx_n_s_clear __pyx_mstate_global->__pyx_n_s_clear +#define __pyx_n_s_cline_in_traceback __pyx_mstate_global->__pyx_n_s_cline_in_traceback +#define __pyx_n_s_compile __pyx_mstate_global->__pyx_n_s_compile +#define __pyx_kp_u_d __pyx_mstate_global->__pyx_kp_u_d +#define __pyx_kp_u_d_d __pyx_mstate_global->__pyx_kp_u_d_d +#define __pyx_kp_u_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d __pyx_mstate_global->__pyx_kp_u_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d +#define __pyx_n_s_desc __pyx_mstate_global->__pyx_n_s_desc +#define __pyx_n_s_detokenize __pyx_mstate_global->__pyx_n_s_detokenize +#define __pyx_kp_u_disable __pyx_mstate_global->__pyx_kp_u_disable +#define __pyx_n_u_doi_Deva __pyx_mstate_global->__pyx_n_u_doi_Deva +#define __pyx_n_u_en __pyx_mstate_global->__pyx_n_u_en +#define __pyx_kp_u_enable __pyx_mstate_global->__pyx_kp_u_enable +#define __pyx_n_u_eng_Latn __pyx_mstate_global->__pyx_n_u_eng_Latn +#define __pyx_n_s_escape __pyx_mstate_global->__pyx_n_s_escape +#define __pyx_n_s_findall __pyx_mstate_global->__pyx_n_s_findall +#define __pyx_kp_u_gc __pyx_mstate_global->__pyx_kp_u_gc +#define __pyx_n_s_get __pyx_mstate_global->__pyx_n_s_get +#define __pyx_n_s_get_normalizer __pyx_mstate_global->__pyx_n_s_get_normalizer +#define __pyx_n_s_getstate __pyx_mstate_global->__pyx_n_s_getstate +#define __pyx_n_u_gom_Deva __pyx_mstate_global->__pyx_n_u_gom_Deva +#define __pyx_n_u_gon_Deva __pyx_mstate_global->__pyx_n_u_gon_Deva +#define __pyx_n_s_group __pyx_mstate_global->__pyx_n_s_group +#define __pyx_n_u_gu __pyx_mstate_global->__pyx_n_u_gu +#define __pyx_n_u_guj_Gujr __pyx_mstate_global->__pyx_n_u_guj_Gujr +#define __pyx_n_u_hi __pyx_mstate_global->__pyx_n_u_hi +#define __pyx_n_u_hin_Deva __pyx_mstate_global->__pyx_n_u_hin_Deva +#define __pyx_n_u_hne_Deva __pyx_mstate_global->__pyx_n_u_hne_Deva +#define __pyx_n_s_import __pyx_mstate_global->__pyx_n_s_import +#define __pyx_n_s_indic_detokenize __pyx_mstate_global->__pyx_n_s_indic_detokenize +#define __pyx_n_s_indic_tokenize __pyx_mstate_global->__pyx_n_s_indic_tokenize +#define __pyx_n_s_indicnlp_normalize_indic_normali __pyx_mstate_global->__pyx_n_s_indicnlp_normalize_indic_normali +#define __pyx_n_s_indicnlp_tokenize __pyx_mstate_global->__pyx_n_s_indicnlp_tokenize +#define __pyx_n_s_indicnlp_transliterate_unicode_t __pyx_mstate_global->__pyx_n_s_indicnlp_transliterate_unicode_t +#define __pyx_n_s_inference __pyx_mstate_global->__pyx_n_s_inference +#define __pyx_n_s_initializing __pyx_mstate_global->__pyx_n_s_initializing +#define __pyx_n_s_is_coroutine __pyx_mstate_global->__pyx_n_s_is_coroutine +#define __pyx_n_s_is_target __pyx_mstate_global->__pyx_n_s_is_target +#define __pyx_kp_u_isenabled __pyx_mstate_global->__pyx_kp_u_isenabled +#define __pyx_n_s_items __pyx_mstate_global->__pyx_n_s_items +#define __pyx_n_u_kK __pyx_mstate_global->__pyx_n_u_kK +#define __pyx_n_u_kan_Knda __pyx_mstate_global->__pyx_n_u_kan_Knda +#define __pyx_n_u_kas_Arab __pyx_mstate_global->__pyx_n_u_kas_Arab +#define __pyx_n_u_kas_Deva __pyx_mstate_global->__pyx_n_u_kas_Deva +#define __pyx_n_u_kha_Latn __pyx_mstate_global->__pyx_n_u_kha_Latn +#define __pyx_n_u_kn __pyx_mstate_global->__pyx_n_u_kn +#define __pyx_n_s_lang __pyx_mstate_global->__pyx_n_s_lang +#define __pyx_n_u_line __pyx_mstate_global->__pyx_n_u_line +#define __pyx_n_u_lus_Latn __pyx_mstate_global->__pyx_n_u_lus_Latn +#define __pyx_n_s_m __pyx_mstate_global->__pyx_n_s_m +#define __pyx_n_u_mag_Deva __pyx_mstate_global->__pyx_n_u_mag_Deva +#define __pyx_n_u_mai_Deva __pyx_mstate_global->__pyx_n_u_mai_Deva +#define __pyx_n_s_main __pyx_mstate_global->__pyx_n_s_main +#define __pyx_n_u_mal_Mlym __pyx_mstate_global->__pyx_n_u_mal_Mlym +#define __pyx_n_u_mar_Deva __pyx_mstate_global->__pyx_n_u_mar_Deva +#define __pyx_n_u_ml __pyx_mstate_global->__pyx_n_u_ml +#define __pyx_n_u_mni_Beng __pyx_mstate_global->__pyx_n_u_mni_Beng +#define __pyx_n_u_mni_Mtei __pyx_mstate_global->__pyx_n_u_mni_Mtei +#define __pyx_n_u_mr __pyx_mstate_global->__pyx_n_u_mr +#define __pyx_kp_u_n __pyx_mstate_global->__pyx_kp_u_n +#define __pyx_kp_u_n_2 __pyx_mstate_global->__pyx_kp_u_n_2 +#define __pyx_n_s_name __pyx_mstate_global->__pyx_n_s_name +#define __pyx_n_u_ne __pyx_mstate_global->__pyx_n_u_ne +#define __pyx_kp_s_no_default___reduce___due_to_non __pyx_mstate_global->__pyx_kp_s_no_default___reduce___due_to_non +#define __pyx_n_s_normalize __pyx_mstate_global->__pyx_n_s_normalize +#define __pyx_n_u_npi_Deva __pyx_mstate_global->__pyx_n_u_npi_Deva +#define __pyx_n_u_or __pyx_mstate_global->__pyx_n_u_or +#define __pyx_n_u_ory __pyx_mstate_global->__pyx_n_u_ory +#define __pyx_n_u_ory_Orya __pyx_mstate_global->__pyx_n_u_ory_Orya +#define __pyx_n_u_pa __pyx_mstate_global->__pyx_n_u_pa +#define __pyx_n_u_pan_Guru __pyx_mstate_global->__pyx_n_u_pan_Guru +#define __pyx_n_s_postprocess_batch __pyx_mstate_global->__pyx_n_s_postprocess_batch +#define __pyx_n_s_preprocess_batch __pyx_mstate_global->__pyx_n_s_preprocess_batch +#define __pyx_n_s_put __pyx_mstate_global->__pyx_n_s_put +#define __pyx_n_s_pyx_state __pyx_mstate_global->__pyx_n_s_pyx_state +#define __pyx_n_s_pyx_vtable __pyx_mstate_global->__pyx_n_s_pyx_vtable +#define __pyx_n_s_queue __pyx_mstate_global->__pyx_n_s_queue +#define __pyx_kp_u_r __pyx_mstate_global->__pyx_kp_u_r +#define __pyx_n_s_range __pyx_mstate_global->__pyx_n_s_range +#define __pyx_n_s_re __pyx_mstate_global->__pyx_n_s_re +#define __pyx_n_s_reduce __pyx_mstate_global->__pyx_n_s_reduce +#define __pyx_n_s_reduce_cython __pyx_mstate_global->__pyx_n_s_reduce_cython +#define __pyx_n_s_reduce_ex __pyx_mstate_global->__pyx_n_s_reduce_ex +#define __pyx_n_s_regex __pyx_mstate_global->__pyx_n_s_regex +#define __pyx_n_s_replace __pyx_mstate_global->__pyx_n_s_replace +#define __pyx_kp_u_s __pyx_mstate_global->__pyx_kp_u_s +#define __pyx_kp_u_s_2 __pyx_mstate_global->__pyx_kp_u_s_2 +#define __pyx_kp_u_s_3 __pyx_mstate_global->__pyx_kp_u_s_3 +#define __pyx_kp_u_s_s __pyx_mstate_global->__pyx_kp_u_s_s +#define __pyx_kp_u_s_s_2 __pyx_mstate_global->__pyx_kp_u_s_s_2 +#define __pyx_n_s_sacremoses __pyx_mstate_global->__pyx_n_s_sacremoses +#define __pyx_n_u_san_Deva __pyx_mstate_global->__pyx_n_u_san_Deva +#define __pyx_n_u_sat_Olck __pyx_mstate_global->__pyx_n_u_sat_Olck +#define __pyx_n_s_self __pyx_mstate_global->__pyx_n_s_self +#define __pyx_n_s_sents __pyx_mstate_global->__pyx_n_s_sents +#define __pyx_n_s_setstate __pyx_mstate_global->__pyx_n_s_setstate +#define __pyx_n_s_setstate_cython __pyx_mstate_global->__pyx_n_s_setstate_cython +#define __pyx_n_u_snd_Arab __pyx_mstate_global->__pyx_n_u_snd_Arab +#define __pyx_n_u_snd_Deva __pyx_mstate_global->__pyx_n_u_snd_Deva +#define __pyx_n_s_spec __pyx_mstate_global->__pyx_n_s_spec +#define __pyx_n_s_split __pyx_mstate_global->__pyx_n_s_split +#define __pyx_n_s_src_lang __pyx_mstate_global->__pyx_n_s_src_lang +#define __pyx_kp_s_stringsource __pyx_mstate_global->__pyx_kp_s_stringsource +#define __pyx_n_s_strip __pyx_mstate_global->__pyx_n_s_strip +#define __pyx_n_s_sub __pyx_mstate_global->__pyx_n_s_sub +#define __pyx_n_u_ta __pyx_mstate_global->__pyx_n_u_ta +#define __pyx_n_u_tam_Taml __pyx_mstate_global->__pyx_n_u_tam_Taml +#define __pyx_n_u_te __pyx_mstate_global->__pyx_n_u_te +#define __pyx_n_u_tel_Telu __pyx_mstate_global->__pyx_n_u_tel_Telu +#define __pyx_n_s_test __pyx_mstate_global->__pyx_n_s_test +#define __pyx_n_s_tgt_lang __pyx_mstate_global->__pyx_n_s_tgt_lang +#define __pyx_n_s_tokenize __pyx_mstate_global->__pyx_n_s_tokenize +#define __pyx_n_s_total __pyx_mstate_global->__pyx_n_s_total +#define __pyx_n_s_tqdm __pyx_mstate_global->__pyx_n_s_tqdm +#define __pyx_n_s_translate __pyx_mstate_global->__pyx_n_s_translate +#define __pyx_n_s_transliterate __pyx_mstate_global->__pyx_n_s_transliterate +#define __pyx_n_s_trivial_detokenize __pyx_mstate_global->__pyx_n_s_trivial_detokenize +#define __pyx_n_s_trivial_tokenize __pyx_mstate_global->__pyx_n_s_trivial_tokenize +#define __pyx_n_s_typing __pyx_mstate_global->__pyx_n_s_typing +#define __pyx_n_s_unit __pyx_mstate_global->__pyx_n_s_unit +#define __pyx_n_u_unr_Deva __pyx_mstate_global->__pyx_n_u_unr_Deva +#define __pyx_n_u_ur __pyx_mstate_global->__pyx_n_u_ur +#define __pyx_n_u_urd_Arab __pyx_mstate_global->__pyx_n_u_urd_Arab +#define __pyx_n_s_visualize __pyx_mstate_global->__pyx_n_s_visualize +#define __pyx_int_0 __pyx_mstate_global->__pyx_int_0 +#define __pyx_tuple__150 __pyx_mstate_global->__pyx_tuple__150 +#define __pyx_tuple__152 __pyx_mstate_global->__pyx_tuple__152 +#define __pyx_tuple__158 __pyx_mstate_global->__pyx_tuple__158 +#define __pyx_tuple__161 __pyx_mstate_global->__pyx_tuple__161 +#define __pyx_tuple__164 __pyx_mstate_global->__pyx_tuple__164 +#define __pyx_tuple__167 __pyx_mstate_global->__pyx_tuple__167 +#define __pyx_tuple__170 __pyx_mstate_global->__pyx_tuple__170 +#define __pyx_tuple__172 __pyx_mstate_global->__pyx_tuple__172 +#define __pyx_tuple__174 __pyx_mstate_global->__pyx_tuple__174 +#define __pyx_tuple__175 __pyx_mstate_global->__pyx_tuple__175 +#define __pyx_tuple__177 __pyx_mstate_global->__pyx_tuple__177 +#define __pyx_tuple__178 __pyx_mstate_global->__pyx_tuple__178 +#define __pyx_tuple__180 __pyx_mstate_global->__pyx_tuple__180 +#define __pyx_codeobj__173 __pyx_mstate_global->__pyx_codeobj__173 +#define __pyx_codeobj__176 __pyx_mstate_global->__pyx_codeobj__176 +#define __pyx_codeobj__179 __pyx_mstate_global->__pyx_codeobj__179 +#define __pyx_codeobj__181 __pyx_mstate_global->__pyx_codeobj__181 +/* #### Code section: module_code ### */ + +/* "IndicTransToolkit/processor.pyx":50 + * cdef object _xliterator + * + * def __cinit__(self, bint inference=True): # <<<<<<<<<<<<<< + * """ + * Constructor for IndicProcessor. Initializes all necessary components. + */ + +/* Python wrapper */ +static int __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static int __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + int __pyx_v_inference; + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[1] = {0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return -1; + #endif + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_inference,0}; + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_VARARGS(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (kw_args > 0) { + PyObject* value = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_inference); + if (value) { values[0] = __Pyx_Arg_NewRef_VARARGS(value); kw_args--; } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 50, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__cinit__") < 0)) __PYX_ERR(0, 50, __pyx_L3_error) + } + } else { + switch (__pyx_nargs) { + case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + } + if (values[0]) { + __pyx_v_inference = __Pyx_PyObject_IsTrue(values[0]); if (unlikely((__pyx_v_inference == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 50, __pyx_L3_error) + } else { + __pyx_v_inference = ((int)1); + } + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 0, 1, __pyx_nargs); __PYX_ERR(0, 50, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return -1; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor___cinit__(((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self), __pyx_v_inference); + + /* function exit code */ + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "IndicTransToolkit/processor.pyx":197 + * (re.compile(r"n"), "n "), + * (re.compile(r"C"), " C"), + * (re.compile(r"[?!;]"), lambda m: m.group(0).strip()), # <<<<<<<<<<<<<< + * (re.compile(r","), ", "), + * ] + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9__cinit___lambda(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +static PyMethodDef __pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_9__cinit___lambda = {"lambda", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9__cinit___lambda, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9__cinit___lambda(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + PyObject *__pyx_v_m = 0; + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[1] = {0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("lambda (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_m,0}; + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_m)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 197, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "lambda") < 0)) __PYX_ERR(0, 197, __pyx_L3_error) + } + } else if (unlikely(__pyx_nargs != 1)) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + } + __pyx_v_m = values[0]; + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("lambda", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 197, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.__cinit__.lambda", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_lambda_funcdef_lambda(__pyx_self, __pyx_v_m); + + /* function exit code */ + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_lambda_funcdef_lambda(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_m) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + unsigned int __pyx_t_5; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("lambda", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_m, __pyx_n_s_group); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 197, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = NULL; + __pyx_t_5 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_3))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3); + if (likely(__pyx_t_4)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3); + __Pyx_INCREF(__pyx_t_4); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_3, function); + __pyx_t_5 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_4, __pyx_int_0}; + __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_3, __pyx_callargs+1-__pyx_t_5, 1+__pyx_t_5); + __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; + if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 197, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + } + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_strip); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 197, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = NULL; + __pyx_t_5 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_3))) { + __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3); + if (likely(__pyx_t_2)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3); + __Pyx_INCREF(__pyx_t_2); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_3, function); + __pyx_t_5 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_2, NULL}; + __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_3, __pyx_callargs+1-__pyx_t_5, 0+__pyx_t_5); + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 197, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + } + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.__cinit__.lambda", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "IndicTransToolkit/processor.pyx":50 + * cdef object _xliterator + * + * def __cinit__(self, bint inference=True): # <<<<<<<<<<<<<< + * """ + * Constructor for IndicProcessor. Initializes all necessary components. + */ + +static int __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor___cinit__(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, int __pyx_v_inference) { + PyObject *__pyx_v_digits_dict = 0; + PyObject *__pyx_v_k = NULL; + PyObject *__pyx_v_v = NULL; + long __pyx_v_c; + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + Py_ssize_t __pyx_t_2; + Py_ssize_t __pyx_t_3; + int __pyx_t_4; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + int __pyx_t_7; + long __pyx_t_8; + long __pyx_t_9; + long __pyx_t_10; + unsigned int __pyx_t_11; + PyObject *__pyx_t_12 = NULL; + PyObject *__pyx_t_13 = NULL; + PyObject *__pyx_t_14 = NULL; + PyObject *__pyx_t_15 = NULL; + PyObject *__pyx_t_16 = NULL; + PyObject *__pyx_t_17 = NULL; + PyObject *__pyx_t_18 = NULL; + PyObject *__pyx_t_19 = NULL; + PyObject *__pyx_t_20 = NULL; + PyObject *__pyx_t_21 = NULL; + PyObject *__pyx_t_22 = NULL; + PyObject *__pyx_t_23 = NULL; + PyObject *__pyx_t_24 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__cinit__", 1); + + /* "IndicTransToolkit/processor.pyx":54 + * Constructor for IndicProcessor. Initializes all necessary components. + * """ + * self.inference = inference # <<<<<<<<<<<<<< + * + * ############################## + */ + __pyx_v_self->inference = __pyx_v_inference; + + /* "IndicTransToolkit/processor.pyx":60 + * ############################## + * self._flores_codes = { + * "asm_Beng": "as", # <<<<<<<<<<<<<< + * "awa_Deva": "hi", + * "ben_Beng": "bn", + */ + __pyx_t_1 = __Pyx_PyDict_NewPresized(34); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 60, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_asm_Beng, __pyx_n_u_as) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_awa_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_ben_Beng, __pyx_n_u_bn) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_bho_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_brx_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_doi_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_eng_Latn, __pyx_n_u_en) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_gom_Deva, __pyx_n_u_kK) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_gon_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_guj_Gujr, __pyx_n_u_gu) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_hin_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_hne_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_kan_Knda, __pyx_n_u_kn) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_kas_Arab, __pyx_n_u_ur) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_kas_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_kha_Latn, __pyx_n_u_en) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_lus_Latn, __pyx_n_u_en) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_mag_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_mai_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_mal_Mlym, __pyx_n_u_ml) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_mar_Deva, __pyx_n_u_mr) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_mni_Beng, __pyx_n_u_bn) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_mni_Mtei, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_npi_Deva, __pyx_n_u_ne) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_ory_Orya, __pyx_n_u_or) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_pan_Guru, __pyx_n_u_pa) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_san_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_sat_Olck, __pyx_n_u_or) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_snd_Arab, __pyx_n_u_ur) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_snd_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_tam_Taml, __pyx_n_u_ta) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_tel_Telu, __pyx_n_u_te) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_urd_Arab, __pyx_n_u_ur) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_unr_Deva, __pyx_n_u_hi) < 0) __PYX_ERR(0, 60, __pyx_L1_error) + + /* "IndicTransToolkit/processor.pyx":59 + * # FLORES -> ISO CODES + * ############################## + * self._flores_codes = { # <<<<<<<<<<<<<< + * "asm_Beng": "as", + * "awa_Deva": "hi", + */ + __Pyx_GIVEREF(__pyx_t_1); + __Pyx_GOTREF(__pyx_v_self->_flores_codes); + __Pyx_DECREF(__pyx_v_self->_flores_codes); + __pyx_v_self->_flores_codes = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":99 + * # INDIC DIGIT TRANSLATION (str.translate) + * ############################## + * self._digits_translation_table = {} # <<<<<<<<<<<<<< + * cdef dict digits_dict = { + * "\u09e6": "0", "\u0ae6": "0", "\u0ce6": "0", "\u0966": "0", + */ + __pyx_t_1 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 99, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_GIVEREF(__pyx_t_1); + __Pyx_GOTREF(__pyx_v_self->_digits_translation_table); + __Pyx_DECREF(__pyx_v_self->_digits_translation_table); + __pyx_v_self->_digits_translation_table = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":101 + * self._digits_translation_table = {} + * cdef dict digits_dict = { + * "\u09e6": "0", "\u0ae6": "0", "\u0ce6": "0", "\u0966": "0", # <<<<<<<<<<<<<< + * "\u0660": "0", "\uabf0": "0", "\u0b66": "0", "\u0a66": "0", + * "\u1c50": "0", "\u06f0": "0", + */ + __pyx_t_1 = __Pyx_PyDict_NewPresized(100); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 101, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u_, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__2, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__3, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__4, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__5, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__6, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__7, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__8, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__9, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__10, __pyx_kp_u_0) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__11, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__12, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__13, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__14, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__15, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__16, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__17, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__18, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__19, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__20, __pyx_kp_u_1) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__21, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__22, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__23, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__24, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__25, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__26, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__27, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__28, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__29, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__30, __pyx_kp_u_2) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__31, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__32, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__33, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__34, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__35, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__36, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__37, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__38, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__39, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__40, __pyx_kp_u_3) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__41, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__42, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__43, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__44, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__45, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__46, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__47, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__48, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__49, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__50, __pyx_kp_u_4) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__51, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__52, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__53, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__54, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__55, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__56, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__57, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__58, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__59, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__60, __pyx_kp_u_5) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__61, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__62, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__63, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__64, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__65, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__66, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__67, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__68, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__69, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__70, __pyx_kp_u_6) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__71, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__72, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__73, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__74, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__75, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__76, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__77, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__78, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__79, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__80, __pyx_kp_u_7) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__81, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__82, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__83, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__84, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__85, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__86, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__87, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__88, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__89, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__90, __pyx_kp_u_8) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__91, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__92, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__93, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__94, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__95, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__96, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__97, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__98, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__99, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_u__100, __pyx_kp_u_9) < 0) __PYX_ERR(0, 101, __pyx_L1_error) + __pyx_v_digits_dict = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":141 + * "\u1c59": "9", "\u0c6f": "9", + * } + * for k, v in digits_dict.items(): # <<<<<<<<<<<<<< + * self._digits_translation_table[ord(k)] = v + * + */ + __pyx_t_2 = 0; + __pyx_t_5 = __Pyx_dict_iterator(__pyx_v_digits_dict, 1, __pyx_n_s_items, (&__pyx_t_3), (&__pyx_t_4)); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 141, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_1); + __pyx_t_1 = __pyx_t_5; + __pyx_t_5 = 0; + while (1) { + __pyx_t_7 = __Pyx_dict_iter_next(__pyx_t_1, __pyx_t_3, &__pyx_t_2, &__pyx_t_5, &__pyx_t_6, NULL, __pyx_t_4); + if (unlikely(__pyx_t_7 == 0)) break; + if (unlikely(__pyx_t_7 == -1)) __PYX_ERR(0, 141, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GOTREF(__pyx_t_6); + __Pyx_XDECREF_SET(__pyx_v_k, __pyx_t_5); + __pyx_t_5 = 0; + __Pyx_XDECREF_SET(__pyx_v_v, __pyx_t_6); + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":142 + * } + * for k, v in digits_dict.items(): + * self._digits_translation_table[ord(k)] = v # <<<<<<<<<<<<<< + * + * # Also map ASCII '0'-'9' + */ + if (unlikely(__pyx_v_self->_digits_translation_table == Py_None)) { + PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); + __PYX_ERR(0, 142, __pyx_L1_error) + } + __pyx_t_8 = __Pyx_PyObject_Ord(__pyx_v_k); if (unlikely(__pyx_t_8 == ((long)(long)(Py_UCS4)-1))) __PYX_ERR(0, 142, __pyx_L1_error) + __pyx_t_6 = __Pyx_PyInt_From_long(__pyx_t_8); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 142, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + if (unlikely((PyDict_SetItem(__pyx_v_self->_digits_translation_table, __pyx_t_6, __pyx_v_v) < 0))) __PYX_ERR(0, 142, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + } + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":145 + * + * # Also map ASCII '0'-'9' + * for c in range(ord('0'), ord('9') + 1): # <<<<<<<<<<<<<< + * self._digits_translation_table[c] = chr(c) + * + */ + __pyx_t_8 = (57 + 1); + __pyx_t_9 = __pyx_t_8; + for (__pyx_t_10 = 48; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) { + __pyx_v_c = __pyx_t_10; + + /* "IndicTransToolkit/processor.pyx":146 + * # Also map ASCII '0'-'9' + * for c in range(ord('0'), ord('9') + 1): + * self._digits_translation_table[c] = chr(c) # <<<<<<<<<<<<<< + * + * ############################## + */ + __pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_c); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 146, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_6 = __Pyx_PyObject_CallOneArg(__pyx_builtin_chr, __pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 146, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (unlikely(__pyx_v_self->_digits_translation_table == Py_None)) { + PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); + __PYX_ERR(0, 146, __pyx_L1_error) + } + __pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_c); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 146, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (unlikely((PyDict_SetItem(__pyx_v_self->_digits_translation_table, __pyx_t_1, __pyx_t_6) < 0))) __PYX_ERR(0, 146, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + } + + /* "IndicTransToolkit/processor.pyx":151 + * # PLACEHOLDER MAP QUEUE + * ############################## + * self._placeholder_entity_maps = Queue() # <<<<<<<<<<<<<< + * + * ############################## + */ + __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_Queue); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 151, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, NULL}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_11, 0+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 151, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + } + __Pyx_GIVEREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_v_self->_placeholder_entity_maps); + __Pyx_DECREF(__pyx_v_self->_placeholder_entity_maps); + __pyx_v_self->_placeholder_entity_maps = __pyx_t_6; + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":156 + * # MOSES (as Python objects) + * ############################## + * self._en_tok = MosesTokenizer(lang="en") # <<<<<<<<<<<<<< + * self._en_normalizer = MosesPunctNormalizer() + * self._en_detok = MosesDetokenizer(lang="en") + */ + __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_MosesTokenizer); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 156, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_1 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 156, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_lang, __pyx_n_u_en) < 0) __PYX_ERR(0, 156, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_empty_tuple, __pyx_t_1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 156, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_GIVEREF(__pyx_t_5); + __Pyx_GOTREF(__pyx_v_self->_en_tok); + __Pyx_DECREF(__pyx_v_self->_en_tok); + __pyx_v_self->_en_tok = __pyx_t_5; + __pyx_t_5 = 0; + + /* "IndicTransToolkit/processor.pyx":157 + * ############################## + * self._en_tok = MosesTokenizer(lang="en") + * self._en_normalizer = MosesPunctNormalizer() # <<<<<<<<<<<<<< + * self._en_detok = MosesDetokenizer(lang="en") + * + */ + __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_MosesPunctNormalizer); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 157, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_6 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_6)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_6); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_6, NULL}; + __pyx_t_5 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_11, 0+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 157, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + } + __Pyx_GIVEREF(__pyx_t_5); + __Pyx_GOTREF(__pyx_v_self->_en_normalizer); + __Pyx_DECREF(__pyx_v_self->_en_normalizer); + __pyx_v_self->_en_normalizer = __pyx_t_5; + __pyx_t_5 = 0; + + /* "IndicTransToolkit/processor.pyx":158 + * self._en_tok = MosesTokenizer(lang="en") + * self._en_normalizer = MosesPunctNormalizer() + * self._en_detok = MosesDetokenizer(lang="en") # <<<<<<<<<<<<<< + * + * ############################## + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_MosesDetokenizer); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 158, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_1 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 158, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_lang, __pyx_n_u_en) < 0) __PYX_ERR(0, 158, __pyx_L1_error) + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_5, __pyx_empty_tuple, __pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 158, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_GIVEREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_v_self->_en_detok); + __Pyx_DECREF(__pyx_v_self->_en_detok); + __pyx_v_self->_en_detok = __pyx_t_6; + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":163 + * # TRANSLITERATOR (Python object) + * ############################## + * self._xliterator = UnicodeIndicTransliterator() # <<<<<<<<<<<<<< + * + * ############################## + */ + __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_UnicodeIndicTransliterator); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 163, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, NULL}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_11, 0+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 163, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + } + __Pyx_GIVEREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_v_self->_xliterator); + __Pyx_DECREF(__pyx_v_self->_xliterator); + __pyx_v_self->_xliterator = __pyx_t_6; + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":168 + * # Precompiled Patterns + * ############################## + * self._MULTISPACE_REGEX = re.compile(r"[ ]{2,}") # <<<<<<<<<<<<<< + * self._DIGIT_SPACE_PERCENT = re.compile(r"(\d) %") + * self._DOUBLE_QUOT_PUNC = re.compile(r"\"([,\.]+)") + */ + __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_re); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 168, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_compile); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 168, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_5))) { + __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_5); + if (likely(__pyx_t_1)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); + __Pyx_INCREF(__pyx_t_1); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_5, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_1, __pyx_kp_u_2_2}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 168, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + } + __Pyx_GIVEREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_v_self->_MULTISPACE_REGEX); + __Pyx_DECREF(__pyx_v_self->_MULTISPACE_REGEX); + __pyx_v_self->_MULTISPACE_REGEX = __pyx_t_6; + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":169 + * ############################## + * self._MULTISPACE_REGEX = re.compile(r"[ ]{2,}") + * self._DIGIT_SPACE_PERCENT = re.compile(r"(\d) %") # <<<<<<<<<<<<<< + * self._DOUBLE_QUOT_PUNC = re.compile(r"\"([,\.]+)") + * self._DIGIT_NBSP_DIGIT = re.compile(r"(\d)(\d)") + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 169, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 169, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_d}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 169, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + } + __Pyx_GIVEREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_v_self->_DIGIT_SPACE_PERCENT); + __Pyx_DECREF(__pyx_v_self->_DIGIT_SPACE_PERCENT); + __pyx_v_self->_DIGIT_SPACE_PERCENT = __pyx_t_6; + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":170 + * self._MULTISPACE_REGEX = re.compile(r"[ ]{2,}") + * self._DIGIT_SPACE_PERCENT = re.compile(r"(\d) %") + * self._DOUBLE_QUOT_PUNC = re.compile(r"\"([,\.]+)") # <<<<<<<<<<<<<< + * self._DIGIT_NBSP_DIGIT = re.compile(r"(\d)(\d)") + * self._END_BRACKET_SPACE_PUNC_REGEX = re.compile(r"\) ([\.!:?;,])") + */ + __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_re); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 170, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_compile); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 170, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_5))) { + __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_5); + if (likely(__pyx_t_1)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); + __Pyx_INCREF(__pyx_t_1); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_5, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_1, __pyx_kp_u__101}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 170, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + } + __Pyx_GIVEREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_v_self->_DOUBLE_QUOT_PUNC); + __Pyx_DECREF(__pyx_v_self->_DOUBLE_QUOT_PUNC); + __pyx_v_self->_DOUBLE_QUOT_PUNC = __pyx_t_6; + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":171 + * self._DIGIT_SPACE_PERCENT = re.compile(r"(\d) %") + * self._DOUBLE_QUOT_PUNC = re.compile(r"\"([,\.]+)") + * self._DIGIT_NBSP_DIGIT = re.compile(r"(\d)(\d)") # <<<<<<<<<<<<<< + * self._END_BRACKET_SPACE_PUNC_REGEX = re.compile(r"\) ([\.!:?;,])") + * + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 171, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 171, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_d_d}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 171, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + } + __Pyx_GIVEREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_v_self->_DIGIT_NBSP_DIGIT); + __Pyx_DECREF(__pyx_v_self->_DIGIT_NBSP_DIGIT); + __pyx_v_self->_DIGIT_NBSP_DIGIT = __pyx_t_6; + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":172 + * self._DOUBLE_QUOT_PUNC = re.compile(r"\"([,\.]+)") + * self._DIGIT_NBSP_DIGIT = re.compile(r"(\d)(\d)") + * self._END_BRACKET_SPACE_PUNC_REGEX = re.compile(r"\) ([\.!:?;,])") # <<<<<<<<<<<<<< + * + * self._URL_PATTERN = re.compile( + */ + __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_re); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 172, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_compile); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 172, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_5))) { + __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_5); + if (likely(__pyx_t_1)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); + __Pyx_INCREF(__pyx_t_1); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_5, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_1, __pyx_kp_u__102}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 172, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + } + __Pyx_GIVEREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_v_self->_END_BRACKET_SPACE_PUNC_REGEX); + __Pyx_DECREF(__pyx_v_self->_END_BRACKET_SPACE_PUNC_REGEX); + __pyx_v_self->_END_BRACKET_SPACE_PUNC_REGEX = __pyx_t_6; + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":174 + * self._END_BRACKET_SPACE_PUNC_REGEX = re.compile(r"\) ([\.!:?;,])") + * + * self._URL_PATTERN = re.compile( # <<<<<<<<<<<<<< + * r"\b(?_URL_PATTERN); + __Pyx_DECREF(__pyx_v_self->_URL_PATTERN); + __pyx_v_self->_URL_PATTERN = __pyx_t_6; + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":177 + * r"\b(?_NUMERAL_PATTERN); + __Pyx_DECREF(__pyx_v_self->_NUMERAL_PATTERN); + __pyx_v_self->_NUMERAL_PATTERN = __pyx_t_6; + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":180 + * r"(~?\d+\.?\d*\s?%?\s?-?\s?~?\d+\.?\d*\s?%|~?\d+%|\d+[-\/.,:']\d+[-\/.,:'+]\d+(?:\.\d+)?|\d+[-\/.:'+]\d+(?:\.\d+)?)" + * ) + * self._EMAIL_PATTERN = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}") # <<<<<<<<<<<<<< + * self._OTHER_PATTERN = re.compile(r"[A-Za-z0-9]*[#|@]\w+") + * + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 180, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 180, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 180, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + } + __Pyx_GIVEREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_v_self->_EMAIL_PATTERN); + __Pyx_DECREF(__pyx_v_self->_EMAIL_PATTERN); + __pyx_v_self->_EMAIL_PATTERN = __pyx_t_6; + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":181 + * ) + * self._EMAIL_PATTERN = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}") + * self._OTHER_PATTERN = re.compile(r"[A-Za-z0-9]*[#|@]\w+") # <<<<<<<<<<<<<< + * + * # Combined punctuation replacements + */ + __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_re); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 181, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_compile); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 181, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_5))) { + __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_5); + if (likely(__pyx_t_1)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); + __Pyx_INCREF(__pyx_t_1); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_5, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_1, __pyx_kp_u_A_Za_z0_9_w}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 181, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + } + __Pyx_GIVEREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_v_self->_OTHER_PATTERN); + __Pyx_DECREF(__pyx_v_self->_OTHER_PATTERN); + __pyx_v_self->_OTHER_PATTERN = __pyx_t_6; + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":185 + * # Combined punctuation replacements + * self._PUNC_REPLACEMENTS = [ + * (re.compile(r"\r"), ""), # <<<<<<<<<<<<<< + * (re.compile(r"\(\s*"), "("), + * (re.compile(r"\s*\)"), ")"), + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 185, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 185, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_r}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 185, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + } + __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 185, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_6)) __PYX_ERR(0, 185, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__103); + __Pyx_GIVEREF(__pyx_kp_u__103); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_kp_u__103)) __PYX_ERR(0, 185, __pyx_L1_error); + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":186 + * self._PUNC_REPLACEMENTS = [ + * (re.compile(r"\r"), ""), + * (re.compile(r"\(\s*"), "("), # <<<<<<<<<<<<<< + * (re.compile(r"\s*\)"), ")"), + * (re.compile(r"\s:\s?"), ":"), + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 186, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 186, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_12); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_12))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_12); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_12); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_12, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_s}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_12, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 186, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; + } + __pyx_t_12 = PyTuple_New(2); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 186, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_12); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_12, 0, __pyx_t_6)) __PYX_ERR(0, 186, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__104); + __Pyx_GIVEREF(__pyx_kp_u__104); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_12, 1, __pyx_kp_u__104)) __PYX_ERR(0, 186, __pyx_L1_error); + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":187 + * (re.compile(r"\r"), ""), + * (re.compile(r"\(\s*"), "("), + * (re.compile(r"\s*\)"), ")"), # <<<<<<<<<<<<<< + * (re.compile(r"\s:\s?"), ":"), + * (re.compile(r"\s;\s?"), ";"), + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_13))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_13); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_13, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_s_2}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_13, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + } + __pyx_t_13 = PyTuple_New(2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_6)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__105); + __Pyx_GIVEREF(__pyx_kp_u__105); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_13, 1, __pyx_kp_u__105)) __PYX_ERR(0, 187, __pyx_L1_error); + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":188 + * (re.compile(r"\(\s*"), "("), + * (re.compile(r"\s*\)"), ")"), + * (re.compile(r"\s:\s?"), ":"), # <<<<<<<<<<<<<< + * (re.compile(r"\s;\s?"), ";"), + * (re.compile(r"[`]"), "'"), + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 188, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 188, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_14))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_14); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_14); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_14, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_s_s}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_14, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 188, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + } + __pyx_t_14 = PyTuple_New(2); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 188, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_t_6)) __PYX_ERR(0, 188, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__106); + __Pyx_GIVEREF(__pyx_kp_u__106); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 1, __pyx_kp_u__106)) __PYX_ERR(0, 188, __pyx_L1_error); + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":189 + * (re.compile(r"\s*\)"), ")"), + * (re.compile(r"\s:\s?"), ":"), + * (re.compile(r"\s;\s?"), ";"), # <<<<<<<<<<<<<< + * (re.compile(r"[`]"), "'"), + * (re.compile(r"[]"), '"'), + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 189, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_15 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_15)) __PYX_ERR(0, 189, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_15); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_15))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_15); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_15); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_15, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_s_s_2}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_15, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 189, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; + } + __pyx_t_15 = PyTuple_New(2); if (unlikely(!__pyx_t_15)) __PYX_ERR(0, 189, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_15); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_6)) __PYX_ERR(0, 189, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__107); + __Pyx_GIVEREF(__pyx_kp_u__107); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_15, 1, __pyx_kp_u__107)) __PYX_ERR(0, 189, __pyx_L1_error); + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":190 + * (re.compile(r"\s:\s?"), ":"), + * (re.compile(r"\s;\s?"), ";"), + * (re.compile(r"[`]"), "'"), # <<<<<<<<<<<<<< + * (re.compile(r"[]"), '"'), + * (re.compile(r"[]"), "-"), + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 190, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 190, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_16))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_16); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_16); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_16, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u__108}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_16, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 190, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + } + __pyx_t_16 = PyTuple_New(2); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 190, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_6)) __PYX_ERR(0, 190, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__109); + __Pyx_GIVEREF(__pyx_kp_u__109); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_16, 1, __pyx_kp_u__109)) __PYX_ERR(0, 190, __pyx_L1_error); + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":191 + * (re.compile(r"\s;\s?"), ";"), + * (re.compile(r"[`]"), "'"), + * (re.compile(r"[]"), '"'), # <<<<<<<<<<<<<< + * (re.compile(r"[]"), "-"), + * (re.compile(r"\.\.\."), "..."), + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 191, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 191, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_17))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_17); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_17, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u__110}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_17, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 191, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + } + __pyx_t_17 = PyTuple_New(2); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 191, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_17, 0, __pyx_t_6)) __PYX_ERR(0, 191, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__111); + __Pyx_GIVEREF(__pyx_kp_u__111); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_17, 1, __pyx_kp_u__111)) __PYX_ERR(0, 191, __pyx_L1_error); + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":192 + * (re.compile(r"[`]"), "'"), + * (re.compile(r"[]"), '"'), + * (re.compile(r"[]"), "-"), # <<<<<<<<<<<<<< + * (re.compile(r"\.\.\."), "..."), + * (re.compile(r"%"), "%"), + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 192, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 192, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_18))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_18); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_18); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_18, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u__112}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_18, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 192, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + } + __pyx_t_18 = PyTuple_New(2); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 192, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_18, 0, __pyx_t_6)) __PYX_ERR(0, 192, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__113); + __Pyx_GIVEREF(__pyx_kp_u__113); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_18, 1, __pyx_kp_u__113)) __PYX_ERR(0, 192, __pyx_L1_error); + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":193 + * (re.compile(r"[]"), '"'), + * (re.compile(r"[]"), "-"), + * (re.compile(r"\.\.\."), "..."), # <<<<<<<<<<<<<< + * (re.compile(r"%"), "%"), + * (re.compile(r"n"), "n "), + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 193, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 193, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_19))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_19); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_19); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_19, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u__114}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_19, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 193, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + } + __pyx_t_19 = PyTuple_New(2); if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 193, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_19); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_19, 0, __pyx_t_6)) __PYX_ERR(0, 193, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__115); + __Pyx_GIVEREF(__pyx_kp_u__115); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_19, 1, __pyx_kp_u__115)) __PYX_ERR(0, 193, __pyx_L1_error); + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":194 + * (re.compile(r"[]"), "-"), + * (re.compile(r"\.\.\."), "..."), + * (re.compile(r"%"), "%"), # <<<<<<<<<<<<<< + * (re.compile(r"n"), "n "), + * (re.compile(r"C"), " C"), + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 194, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_20 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_20)) __PYX_ERR(0, 194, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_20); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_20))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_20); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_20); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_20, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u__116}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_20, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 194, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_20); __pyx_t_20 = 0; + } + __pyx_t_20 = PyTuple_New(2); if (unlikely(!__pyx_t_20)) __PYX_ERR(0, 194, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_20); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_20, 0, __pyx_t_6)) __PYX_ERR(0, 194, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__117); + __Pyx_GIVEREF(__pyx_kp_u__117); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_20, 1, __pyx_kp_u__117)) __PYX_ERR(0, 194, __pyx_L1_error); + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":195 + * (re.compile(r"\.\.\."), "..."), + * (re.compile(r"%"), "%"), + * (re.compile(r"n"), "n "), # <<<<<<<<<<<<<< + * (re.compile(r"C"), " C"), + * (re.compile(r"[?!;]"), lambda m: m.group(0).strip()), + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 195, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_21 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 195, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_21); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_21))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_21); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_21); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_21, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_n}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_21, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 195, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_21); __pyx_t_21 = 0; + } + __pyx_t_21 = PyTuple_New(2); if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 195, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_21); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_21, 0, __pyx_t_6)) __PYX_ERR(0, 195, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u_n_2); + __Pyx_GIVEREF(__pyx_kp_u_n_2); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_21, 1, __pyx_kp_u_n_2)) __PYX_ERR(0, 195, __pyx_L1_error); + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":196 + * (re.compile(r"%"), "%"), + * (re.compile(r"n"), "n "), + * (re.compile(r"C"), " C"), # <<<<<<<<<<<<<< + * (re.compile(r"[?!;]"), lambda m: m.group(0).strip()), + * (re.compile(r","), ", "), + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 196, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_22 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_22)) __PYX_ERR(0, 196, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_22); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_22))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_22); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_22); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_22, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u_C}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_22, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 196, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_22); __pyx_t_22 = 0; + } + __pyx_t_22 = PyTuple_New(2); if (unlikely(!__pyx_t_22)) __PYX_ERR(0, 196, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_22); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_22, 0, __pyx_t_6)) __PYX_ERR(0, 196, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u_C_2); + __Pyx_GIVEREF(__pyx_kp_u_C_2); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_22, 1, __pyx_kp_u_C_2)) __PYX_ERR(0, 196, __pyx_L1_error); + __pyx_t_6 = 0; + + /* "IndicTransToolkit/processor.pyx":197 + * (re.compile(r"n"), "n "), + * (re.compile(r"C"), " C"), + * (re.compile(r"[?!;]"), lambda m: m.group(0).strip()), # <<<<<<<<<<<<<< + * (re.compile(r","), ", "), + * ] + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_re); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 197, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_23 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_compile); if (unlikely(!__pyx_t_23)) __PYX_ERR(0, 197, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_23); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_23))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_23); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_23); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_23, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_kp_u__118}; + __pyx_t_6 = __Pyx_PyObject_FastCall(__pyx_t_23, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 197, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_23); __pyx_t_23 = 0; + } + __pyx_t_23 = __Pyx_CyFunction_New(&__pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_9__cinit___lambda, 0, __pyx_n_s_cinit___locals_lambda, NULL, __pyx_n_s_IndicTransToolkit_processor, __pyx_d, NULL); if (unlikely(!__pyx_t_23)) __PYX_ERR(0, 197, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_23); + __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 197, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_6)) __PYX_ERR(0, 197, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_23); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_23)) __PYX_ERR(0, 197, __pyx_L1_error); + __pyx_t_6 = 0; + __pyx_t_23 = 0; + + /* "IndicTransToolkit/processor.pyx":198 + * (re.compile(r"C"), " C"), + * (re.compile(r"[?!;]"), lambda m: m.group(0).strip()), + * (re.compile(r","), ", "), # <<<<<<<<<<<<<< + * ] + * + */ + __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_re); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 198, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_24 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_compile); if (unlikely(!__pyx_t_24)) __PYX_ERR(0, 198, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_24); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = NULL; + __pyx_t_11 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_24))) { + __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_24); + if (likely(__pyx_t_6)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_24); + __Pyx_INCREF(__pyx_t_6); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_24, function); + __pyx_t_11 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_6, __pyx_kp_u__119}; + __pyx_t_23 = __Pyx_PyObject_FastCall(__pyx_t_24, __pyx_callargs+1-__pyx_t_11, 1+__pyx_t_11); + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (unlikely(!__pyx_t_23)) __PYX_ERR(0, 198, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_23); + __Pyx_DECREF(__pyx_t_24); __pyx_t_24 = 0; + } + __pyx_t_24 = PyTuple_New(2); if (unlikely(!__pyx_t_24)) __PYX_ERR(0, 198, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_24); + __Pyx_GIVEREF(__pyx_t_23); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_24, 0, __pyx_t_23)) __PYX_ERR(0, 198, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__120); + __Pyx_GIVEREF(__pyx_kp_u__120); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_24, 1, __pyx_kp_u__120)) __PYX_ERR(0, 198, __pyx_L1_error); + __pyx_t_23 = 0; + + /* "IndicTransToolkit/processor.pyx":184 + * + * # Combined punctuation replacements + * self._PUNC_REPLACEMENTS = [ # <<<<<<<<<<<<<< + * (re.compile(r"\r"), ""), + * (re.compile(r"\(\s*"), "("), + */ + __pyx_t_23 = PyList_New(14); if (unlikely(!__pyx_t_23)) __PYX_ERR(0, 184, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_23); + __Pyx_GIVEREF(__pyx_t_1); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 0, __pyx_t_1)) __PYX_ERR(0, 184, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_12); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 1, __pyx_t_12)) __PYX_ERR(0, 184, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_13); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 2, __pyx_t_13)) __PYX_ERR(0, 184, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_14); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 3, __pyx_t_14)) __PYX_ERR(0, 184, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_15); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 4, __pyx_t_15)) __PYX_ERR(0, 184, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_16); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 5, __pyx_t_16)) __PYX_ERR(0, 184, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_17); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 6, __pyx_t_17)) __PYX_ERR(0, 184, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_18); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 7, __pyx_t_18)) __PYX_ERR(0, 184, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_19); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 8, __pyx_t_19)) __PYX_ERR(0, 184, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_20); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 9, __pyx_t_20)) __PYX_ERR(0, 184, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_21); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 10, __pyx_t_21)) __PYX_ERR(0, 184, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_22); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 11, __pyx_t_22)) __PYX_ERR(0, 184, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_5); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 12, __pyx_t_5)) __PYX_ERR(0, 184, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_24); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 13, __pyx_t_24)) __PYX_ERR(0, 184, __pyx_L1_error); + __pyx_t_1 = 0; + __pyx_t_12 = 0; + __pyx_t_13 = 0; + __pyx_t_14 = 0; + __pyx_t_15 = 0; + __pyx_t_16 = 0; + __pyx_t_17 = 0; + __pyx_t_18 = 0; + __pyx_t_19 = 0; + __pyx_t_20 = 0; + __pyx_t_21 = 0; + __pyx_t_22 = 0; + __pyx_t_5 = 0; + __pyx_t_24 = 0; + __Pyx_GIVEREF(__pyx_t_23); + __Pyx_GOTREF(__pyx_v_self->_PUNC_REPLACEMENTS); + __Pyx_DECREF(__pyx_v_self->_PUNC_REPLACEMENTS); + __pyx_v_self->_PUNC_REPLACEMENTS = ((PyObject*)__pyx_t_23); + __pyx_t_23 = 0; + + /* "IndicTransToolkit/processor.pyx":201 + * ] + * + * self._INDIC_FAILURE_CASES = [ # <<<<<<<<<<<<<< + * " ", + * "", + */ + __pyx_t_23 = PyList_New(18); if (unlikely(!__pyx_t_23)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_23); + __Pyx_INCREF(__pyx_kp_u__121); + __Pyx_GIVEREF(__pyx_kp_u__121); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 0, __pyx_kp_u__121)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__122); + __Pyx_GIVEREF(__pyx_kp_u__122); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 1, __pyx_kp_u__122)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__123); + __Pyx_GIVEREF(__pyx_kp_u__123); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 2, __pyx_kp_u__123)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__124); + __Pyx_GIVEREF(__pyx_kp_u__124); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 3, __pyx_kp_u__124)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__125); + __Pyx_GIVEREF(__pyx_kp_u__125); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 4, __pyx_kp_u__125)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__126); + __Pyx_GIVEREF(__pyx_kp_u__126); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 5, __pyx_kp_u__126)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__127); + __Pyx_GIVEREF(__pyx_kp_u__127); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 6, __pyx_kp_u__127)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__128); + __Pyx_GIVEREF(__pyx_kp_u__128); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 7, __pyx_kp_u__128)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__129); + __Pyx_GIVEREF(__pyx_kp_u__129); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 8, __pyx_kp_u__129)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__130); + __Pyx_GIVEREF(__pyx_kp_u__130); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 9, __pyx_kp_u__130)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__131); + __Pyx_GIVEREF(__pyx_kp_u__131); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 10, __pyx_kp_u__131)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__132); + __Pyx_GIVEREF(__pyx_kp_u__132); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 11, __pyx_kp_u__132)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__133); + __Pyx_GIVEREF(__pyx_kp_u__133); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 12, __pyx_kp_u__133)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__134); + __Pyx_GIVEREF(__pyx_kp_u__134); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 13, __pyx_kp_u__134)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__135); + __Pyx_GIVEREF(__pyx_kp_u__135); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 14, __pyx_kp_u__135)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__136); + __Pyx_GIVEREF(__pyx_kp_u__136); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 15, __pyx_kp_u__136)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_kp_u__137); + __Pyx_GIVEREF(__pyx_kp_u__137); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 16, __pyx_kp_u__137)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_INCREF(__pyx_n_u__138); + __Pyx_GIVEREF(__pyx_n_u__138); + if (__Pyx_PyList_SET_ITEM(__pyx_t_23, 17, __pyx_n_u__138)) __PYX_ERR(0, 201, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_23); + __Pyx_GOTREF(__pyx_v_self->_INDIC_FAILURE_CASES); + __Pyx_DECREF(__pyx_v_self->_INDIC_FAILURE_CASES); + __pyx_v_self->_INDIC_FAILURE_CASES = ((PyObject*)__pyx_t_23); + __pyx_t_23 = 0; + + /* "IndicTransToolkit/processor.pyx":50 + * cdef object _xliterator + * + * def __cinit__(self, bint inference=True): # <<<<<<<<<<<<<< + * """ + * Constructor for IndicProcessor. Initializes all necessary components. + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_12); + __Pyx_XDECREF(__pyx_t_13); + __Pyx_XDECREF(__pyx_t_14); + __Pyx_XDECREF(__pyx_t_15); + __Pyx_XDECREF(__pyx_t_16); + __Pyx_XDECREF(__pyx_t_17); + __Pyx_XDECREF(__pyx_t_18); + __Pyx_XDECREF(__pyx_t_19); + __Pyx_XDECREF(__pyx_t_20); + __Pyx_XDECREF(__pyx_t_21); + __Pyx_XDECREF(__pyx_t_22); + __Pyx_XDECREF(__pyx_t_23); + __Pyx_XDECREF(__pyx_t_24); + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_digits_dict); + __Pyx_XDECREF(__pyx_v_k); + __Pyx_XDECREF(__pyx_v_v); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "IndicTransToolkit/processor.pyx":223 + * + * # Internal Method: Apply punctuation replacements + * cdef str _apply_punc_replacements(self, str text, list replacements) except *: # <<<<<<<<<<<<<< + * """ + * Apply a list of (pattern, replacement) in sequence to text. + */ + +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__apply_punc_replacements(CYTHON_UNUSED struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text, PyObject *__pyx_v_replacements) { + int __pyx_v_i; + PyObject *__pyx_v_pair = 0; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + Py_ssize_t __pyx_t_1; + Py_ssize_t __pyx_t_2; + int __pyx_t_3; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + unsigned int __pyx_t_7; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("_apply_punc_replacements", 0); + __Pyx_INCREF(__pyx_v_text); + + /* "IndicTransToolkit/processor.pyx":229 + * cdef int i + * cdef tuple pair + * for i in range(len(replacements)): # <<<<<<<<<<<<<< + * pair = replacements[i] + * text = pair[0].sub(pair[1], text) + */ + if (unlikely(__pyx_v_replacements == Py_None)) { + PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); + __PYX_ERR(0, 229, __pyx_L1_error) + } + __pyx_t_1 = __Pyx_PyList_GET_SIZE(__pyx_v_replacements); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(0, 229, __pyx_L1_error) + __pyx_t_2 = __pyx_t_1; + for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { + __pyx_v_i = __pyx_t_3; + + /* "IndicTransToolkit/processor.pyx":230 + * cdef tuple pair + * for i in range(len(replacements)): + * pair = replacements[i] # <<<<<<<<<<<<<< + * text = pair[0].sub(pair[1], text) + * return text + */ + if (unlikely(__pyx_v_replacements == Py_None)) { + PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); + __PYX_ERR(0, 230, __pyx_L1_error) + } + if (!(likely(PyTuple_CheckExact(PyList_GET_ITEM(__pyx_v_replacements, __pyx_v_i)))||((PyList_GET_ITEM(__pyx_v_replacements, __pyx_v_i)) == Py_None) || __Pyx_RaiseUnexpectedTypeError("tuple", PyList_GET_ITEM(__pyx_v_replacements, __pyx_v_i)))) __PYX_ERR(0, 230, __pyx_L1_error) + __pyx_t_4 = PyList_GET_ITEM(__pyx_v_replacements, __pyx_v_i); + __Pyx_INCREF(__pyx_t_4); + __Pyx_XDECREF_SET(__pyx_v_pair, ((PyObject*)__pyx_t_4)); + __pyx_t_4 = 0; + + /* "IndicTransToolkit/processor.pyx":231 + * for i in range(len(replacements)): + * pair = replacements[i] + * text = pair[0].sub(pair[1], text) # <<<<<<<<<<<<<< + * return text + * + */ + if (unlikely(__pyx_v_pair == Py_None)) { + PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); + __PYX_ERR(0, 231, __pyx_L1_error) + } + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(PyTuple_GET_ITEM(__pyx_v_pair, 0), __pyx_n_s_sub); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 231, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + if (unlikely(__pyx_v_pair == Py_None)) { + PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); + __PYX_ERR(0, 231, __pyx_L1_error) + } + __pyx_t_6 = NULL; + __pyx_t_7 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_5))) { + __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5); + if (likely(__pyx_t_6)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); + __Pyx_INCREF(__pyx_t_6); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_5, function); + __pyx_t_7 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[3] = {__pyx_t_6, PyTuple_GET_ITEM(__pyx_v_pair, 1), __pyx_v_text}; + __pyx_t_4 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_7, 2+__pyx_t_7); + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 231, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + } + if (!(likely(PyUnicode_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_4))) __PYX_ERR(0, 231, __pyx_L1_error) + __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_4)); + __pyx_t_4 = 0; + } + + /* "IndicTransToolkit/processor.pyx":232 + * pair = replacements[i] + * text = pair[0].sub(pair[1], text) + * return text # <<<<<<<<<<<<<< + * + * # Internal Method: Punctuation Normalization + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_text); + __pyx_r = __pyx_v_text; + goto __pyx_L0; + + /* "IndicTransToolkit/processor.pyx":223 + * + * # Internal Method: Apply punctuation replacements + * cdef str _apply_punc_replacements(self, str text, list replacements) except *: # <<<<<<<<<<<<<< + * """ + * Apply a list of (pattern, replacement) in sequence to text. + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor._apply_punc_replacements", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_pair); + __Pyx_XDECREF(__pyx_v_text); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "IndicTransToolkit/processor.pyx":235 + * + * # Internal Method: Punctuation Normalization + * cdef str _punc_norm(self, str text) except *: # <<<<<<<<<<<<<< + * """ + * Consolidate punctuation normalization in fewer passes. + */ + +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__punc_norm(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + unsigned int __pyx_t_4; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("_punc_norm", 0); + __Pyx_INCREF(__pyx_v_text); + + /* "IndicTransToolkit/processor.pyx":240 + * """ + * # 1) Apply replacements + * text = self._apply_punc_replacements(text, self._PUNC_REPLACEMENTS) # <<<<<<<<<<<<<< + * + * # 2) Additional patterns + */ + __pyx_t_1 = __pyx_v_self->_PUNC_REPLACEMENTS; + __Pyx_INCREF(__pyx_t_1); + __pyx_t_2 = ((struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self->__pyx_vtab)->_apply_punc_replacements(__pyx_v_self, __pyx_v_text, ((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 240, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_2)); + __pyx_t_2 = 0; + + /* "IndicTransToolkit/processor.pyx":243 + * + * # 2) Additional patterns + * text = self._MULTISPACE_REGEX.sub(" ", text) # <<<<<<<<<<<<<< + * text = self._END_BRACKET_SPACE_PUNC_REGEX.sub(r")\1", text) + * text = self._DIGIT_SPACE_PERCENT.sub(r"\1%", text) + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_MULTISPACE_REGEX, __pyx_n_s_sub); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 243, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = NULL; + __pyx_t_4 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_3)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_3); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_4 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[3] = {__pyx_t_3, __pyx_kp_u__139, __pyx_v_text}; + __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_4, 2+__pyx_t_4); + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 243, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + } + if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 243, __pyx_L1_error) + __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_2)); + __pyx_t_2 = 0; + + /* "IndicTransToolkit/processor.pyx":244 + * # 2) Additional patterns + * text = self._MULTISPACE_REGEX.sub(" ", text) + * text = self._END_BRACKET_SPACE_PUNC_REGEX.sub(r")\1", text) # <<<<<<<<<<<<<< + * text = self._DIGIT_SPACE_PERCENT.sub(r"\1%", text) + * text = self._DOUBLE_QUOT_PUNC.sub(r'\1"', text) + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_END_BRACKET_SPACE_PUNC_REGEX, __pyx_n_s_sub); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 244, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = NULL; + __pyx_t_4 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_3)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_3); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_4 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[3] = {__pyx_t_3, __pyx_kp_u_1_2, __pyx_v_text}; + __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_4, 2+__pyx_t_4); + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 244, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + } + if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 244, __pyx_L1_error) + __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_2)); + __pyx_t_2 = 0; + + /* "IndicTransToolkit/processor.pyx":245 + * text = self._MULTISPACE_REGEX.sub(" ", text) + * text = self._END_BRACKET_SPACE_PUNC_REGEX.sub(r")\1", text) + * text = self._DIGIT_SPACE_PERCENT.sub(r"\1%", text) # <<<<<<<<<<<<<< + * text = self._DOUBLE_QUOT_PUNC.sub(r'\1"', text) + * text = self._DIGIT_NBSP_DIGIT.sub(r"\1.\2", text) + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_DIGIT_SPACE_PERCENT, __pyx_n_s_sub); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 245, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = NULL; + __pyx_t_4 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_3)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_3); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_4 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[3] = {__pyx_t_3, __pyx_kp_u_1_3, __pyx_v_text}; + __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_4, 2+__pyx_t_4); + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 245, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + } + if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 245, __pyx_L1_error) + __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_2)); + __pyx_t_2 = 0; + + /* "IndicTransToolkit/processor.pyx":246 + * text = self._END_BRACKET_SPACE_PUNC_REGEX.sub(r")\1", text) + * text = self._DIGIT_SPACE_PERCENT.sub(r"\1%", text) + * text = self._DOUBLE_QUOT_PUNC.sub(r'\1"', text) # <<<<<<<<<<<<<< + * text = self._DIGIT_NBSP_DIGIT.sub(r"\1.\2", text) + * return text.strip() + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_DOUBLE_QUOT_PUNC, __pyx_n_s_sub); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 246, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = NULL; + __pyx_t_4 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_3)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_3); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_4 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[3] = {__pyx_t_3, __pyx_kp_u_1_4, __pyx_v_text}; + __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_4, 2+__pyx_t_4); + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 246, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + } + if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 246, __pyx_L1_error) + __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_2)); + __pyx_t_2 = 0; + + /* "IndicTransToolkit/processor.pyx":247 + * text = self._DIGIT_SPACE_PERCENT.sub(r"\1%", text) + * text = self._DOUBLE_QUOT_PUNC.sub(r'\1"', text) + * text = self._DIGIT_NBSP_DIGIT.sub(r"\1.\2", text) # <<<<<<<<<<<<<< + * return text.strip() + * + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_DIGIT_NBSP_DIGIT, __pyx_n_s_sub); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 247, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = NULL; + __pyx_t_4 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_3)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_3); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_4 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[3] = {__pyx_t_3, __pyx_kp_u_1_2_2, __pyx_v_text}; + __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_4, 2+__pyx_t_4); + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 247, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + } + if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 247, __pyx_L1_error) + __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_2)); + __pyx_t_2 = 0; + + /* "IndicTransToolkit/processor.pyx":248 + * text = self._DOUBLE_QUOT_PUNC.sub(r'\1"', text) + * text = self._DIGIT_NBSP_DIGIT.sub(r"\1.\2", text) + * return text.strip() # <<<<<<<<<<<<<< + * + * # Internal Method: Wrap Text with Placeholders + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_2 = __Pyx_CallUnboundCMethod0(&__pyx_umethod_PyUnicode_Type_strip, __pyx_v_text); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 248, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 248, __pyx_L1_error) + __pyx_r = ((PyObject*)__pyx_t_2); + __pyx_t_2 = 0; + goto __pyx_L0; + + /* "IndicTransToolkit/processor.pyx":235 + * + * # Internal Method: Punctuation Normalization + * cdef str _punc_norm(self, str text) except *: # <<<<<<<<<<<<<< + * """ + * Consolidate punctuation normalization in fewer passes. + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor._punc_norm", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_text); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "IndicTransToolkit/processor.pyx":251 + * + * # Internal Method: Wrap Text with Placeholders + * cdef str _wrap_with_placeholders(self, str text) except *: # <<<<<<<<<<<<<< + * """ + * Wrap substrings with matched patterns in the text with placeholders. + */ + +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__wrap_with_placeholders(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text) { + int __pyx_v_serial_no; + PyObject *__pyx_v_placeholder_entity_map = 0; + PyObject *__pyx_v_patterns = 0; + PyObject *__pyx_v_pattern = 0; + PyObject *__pyx_v_matches = 0; + PyObject *__pyx_v_match = 0; + PyObject *__pyx_v_base_placeholder = 0; + int __pyx_v_i; + PyObject *__pyx_v_indic_case = NULL; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + Py_ssize_t __pyx_t_2; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + unsigned int __pyx_t_6; + Py_ssize_t __pyx_t_7; + Py_ssize_t __pyx_t_8; + int __pyx_t_9; + int __pyx_t_10; + int __pyx_t_11; + Py_ssize_t __pyx_t_12; + Py_UCS4 __pyx_t_13; + Py_ssize_t __pyx_t_14; + Py_ssize_t __pyx_t_15; + PyObject *__pyx_t_16 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("_wrap_with_placeholders", 0); + __Pyx_INCREF(__pyx_v_text); + + /* "IndicTransToolkit/processor.pyx":256 + * Store the placeholder map in the queue for retrieval in postprocessing. + * """ + * cdef int serial_no = 1 # <<<<<<<<<<<<<< + * cdef dict placeholder_entity_map = {} + * cdef list patterns = [ + */ + __pyx_v_serial_no = 1; + + /* "IndicTransToolkit/processor.pyx":257 + * """ + * cdef int serial_no = 1 + * cdef dict placeholder_entity_map = {} # <<<<<<<<<<<<<< + * cdef list patterns = [ + * self._EMAIL_PATTERN, + */ + __pyx_t_1 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 257, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_v_placeholder_entity_map = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":258 + * cdef int serial_no = 1 + * cdef dict placeholder_entity_map = {} + * cdef list patterns = [ # <<<<<<<<<<<<<< + * self._EMAIL_PATTERN, + * self._URL_PATTERN, + */ + __pyx_t_1 = PyList_New(4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 258, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_INCREF(__pyx_v_self->_EMAIL_PATTERN); + __Pyx_GIVEREF(__pyx_v_self->_EMAIL_PATTERN); + if (__Pyx_PyList_SET_ITEM(__pyx_t_1, 0, __pyx_v_self->_EMAIL_PATTERN)) __PYX_ERR(0, 258, __pyx_L1_error); + __Pyx_INCREF(__pyx_v_self->_URL_PATTERN); + __Pyx_GIVEREF(__pyx_v_self->_URL_PATTERN); + if (__Pyx_PyList_SET_ITEM(__pyx_t_1, 1, __pyx_v_self->_URL_PATTERN)) __PYX_ERR(0, 258, __pyx_L1_error); + __Pyx_INCREF(__pyx_v_self->_NUMERAL_PATTERN); + __Pyx_GIVEREF(__pyx_v_self->_NUMERAL_PATTERN); + if (__Pyx_PyList_SET_ITEM(__pyx_t_1, 2, __pyx_v_self->_NUMERAL_PATTERN)) __PYX_ERR(0, 258, __pyx_L1_error); + __Pyx_INCREF(__pyx_v_self->_OTHER_PATTERN); + __Pyx_GIVEREF(__pyx_v_self->_OTHER_PATTERN); + if (__Pyx_PyList_SET_ITEM(__pyx_t_1, 3, __pyx_v_self->_OTHER_PATTERN)) __PYX_ERR(0, 258, __pyx_L1_error); + __pyx_v_patterns = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":270 + * cdef int i + * + * for pattern in patterns: # <<<<<<<<<<<<<< + * matches = set(pattern.findall(text)) + * for match in matches: + */ + __pyx_t_1 = __pyx_v_patterns; __Pyx_INCREF(__pyx_t_1); + __pyx_t_2 = 0; + for (;;) { + { + Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_1); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 270, __pyx_L1_error) + #endif + if (__pyx_t_2 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++; if (unlikely((0 < 0))) __PYX_ERR(0, 270, __pyx_L1_error) + #else + __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 270, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + __Pyx_XDECREF_SET(__pyx_v_pattern, __pyx_t_3); + __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":271 + * + * for pattern in patterns: + * matches = set(pattern.findall(text)) # <<<<<<<<<<<<<< + * for match in matches: + * # Additional checks + */ + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_pattern, __pyx_n_s_findall); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 271, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_5 = NULL; + __pyx_t_6 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_4))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_4, function); + __pyx_t_6 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, __pyx_v_text}; + __pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 1+__pyx_t_6); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 271, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + } + __pyx_t_4 = PySet_New(__pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 271, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_XDECREF_SET(__pyx_v_matches, ((PyObject*)__pyx_t_4)); + __pyx_t_4 = 0; + + /* "IndicTransToolkit/processor.pyx":272 + * for pattern in patterns: + * matches = set(pattern.findall(text)) + * for match in matches: # <<<<<<<<<<<<<< + * # Additional checks + * if pattern is self._URL_PATTERN: + */ + __pyx_t_7 = 0; + __pyx_t_3 = __Pyx_set_iterator(__pyx_v_matches, 1, (&__pyx_t_8), (&__pyx_t_9)); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 272, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __pyx_t_4 = __pyx_t_3; + __pyx_t_3 = 0; + while (1) { + __pyx_t_10 = __Pyx_set_iter_next(__pyx_t_4, __pyx_t_8, &__pyx_t_7, &__pyx_t_3, __pyx_t_9); + if (unlikely(__pyx_t_10 == 0)) break; + if (unlikely(__pyx_t_10 == -1)) __PYX_ERR(0, 272, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(PyUnicode_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_3))) __PYX_ERR(0, 272, __pyx_L1_error) + __Pyx_XDECREF_SET(__pyx_v_match, ((PyObject*)__pyx_t_3)); + __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":274 + * for match in matches: + * # Additional checks + * if pattern is self._URL_PATTERN: # <<<<<<<<<<<<<< + * if len(match.replace(".", "")) < 4: + * continue + */ + __pyx_t_11 = (__pyx_v_pattern == __pyx_v_self->_URL_PATTERN); + if (__pyx_t_11) { + + /* "IndicTransToolkit/processor.pyx":275 + * # Additional checks + * if pattern is self._URL_PATTERN: + * if len(match.replace(".", "")) < 4: # <<<<<<<<<<<<<< + * continue + * if pattern is self._NUMERAL_PATTERN: + */ + if (unlikely(__pyx_v_match == Py_None)) { + PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "replace"); + __PYX_ERR(0, 275, __pyx_L1_error) + } + __pyx_t_3 = PyUnicode_Replace(__pyx_v_match, __pyx_kp_u__140, __pyx_kp_u__103, -1L); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 275, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_12 = __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); if (unlikely(__pyx_t_12 == ((Py_ssize_t)-1))) __PYX_ERR(0, 275, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_11 = (__pyx_t_12 < 4); + if (__pyx_t_11) { + + /* "IndicTransToolkit/processor.pyx":276 + * if pattern is self._URL_PATTERN: + * if len(match.replace(".", "")) < 4: + * continue # <<<<<<<<<<<<<< + * if pattern is self._NUMERAL_PATTERN: + * if len(match.replace(" ", "").replace(".", "").replace(":", "")) < 4: + */ + goto __pyx_L5_continue; + + /* "IndicTransToolkit/processor.pyx":275 + * # Additional checks + * if pattern is self._URL_PATTERN: + * if len(match.replace(".", "")) < 4: # <<<<<<<<<<<<<< + * continue + * if pattern is self._NUMERAL_PATTERN: + */ + } + + /* "IndicTransToolkit/processor.pyx":274 + * for match in matches: + * # Additional checks + * if pattern is self._URL_PATTERN: # <<<<<<<<<<<<<< + * if len(match.replace(".", "")) < 4: + * continue + */ + } + + /* "IndicTransToolkit/processor.pyx":277 + * if len(match.replace(".", "")) < 4: + * continue + * if pattern is self._NUMERAL_PATTERN: # <<<<<<<<<<<<<< + * if len(match.replace(" ", "").replace(".", "").replace(":", "")) < 4: + * continue + */ + __pyx_t_11 = (__pyx_v_pattern == __pyx_v_self->_NUMERAL_PATTERN); + if (__pyx_t_11) { + + /* "IndicTransToolkit/processor.pyx":278 + * continue + * if pattern is self._NUMERAL_PATTERN: + * if len(match.replace(" ", "").replace(".", "").replace(":", "")) < 4: # <<<<<<<<<<<<<< + * continue + * + */ + if (unlikely(__pyx_v_match == Py_None)) { + PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "replace"); + __PYX_ERR(0, 278, __pyx_L1_error) + } + __pyx_t_3 = PyUnicode_Replace(__pyx_v_match, __pyx_kp_u__139, __pyx_kp_u__103, -1L); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 278, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_5 = PyUnicode_Replace(((PyObject*)__pyx_t_3), __pyx_kp_u__140, __pyx_kp_u__103, -1L); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 278, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = PyUnicode_Replace(((PyObject*)__pyx_t_5), __pyx_kp_u__106, __pyx_kp_u__103, -1L); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 278, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_12 = __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); if (unlikely(__pyx_t_12 == ((Py_ssize_t)-1))) __PYX_ERR(0, 278, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_11 = (__pyx_t_12 < 4); + if (__pyx_t_11) { + + /* "IndicTransToolkit/processor.pyx":279 + * if pattern is self._NUMERAL_PATTERN: + * if len(match.replace(" ", "").replace(".", "").replace(":", "")) < 4: + * continue # <<<<<<<<<<<<<< + * + * base_placeholder = f"" + */ + goto __pyx_L5_continue; + + /* "IndicTransToolkit/processor.pyx":278 + * continue + * if pattern is self._NUMERAL_PATTERN: + * if len(match.replace(" ", "").replace(".", "").replace(":", "")) < 4: # <<<<<<<<<<<<<< + * continue + * + */ + } + + /* "IndicTransToolkit/processor.pyx":277 + * if len(match.replace(".", "")) < 4: + * continue + * if pattern is self._NUMERAL_PATTERN: # <<<<<<<<<<<<<< + * if len(match.replace(" ", "").replace(".", "").replace(":", "")) < 4: + * continue + */ + } + + /* "IndicTransToolkit/processor.pyx":281 + * continue + * + * base_placeholder = f"" # <<<<<<<<<<<<<< + * # Map various placeholder formats to the matched text + * placeholder_entity_map[f""] = match + */ + __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 281, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_12 = 0; + __pyx_t_13 = 127; + __Pyx_INCREF(__pyx_kp_u_ID); + __pyx_t_12 += 3; + __Pyx_GIVEREF(__pyx_kp_u_ID); + PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_kp_u_ID); + __pyx_t_5 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 281, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_12 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_5); + __pyx_t_5 = 0; + __Pyx_INCREF(__pyx_kp_u__141); + __pyx_t_12 += 1; + __Pyx_GIVEREF(__pyx_kp_u__141); + PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_kp_u__141); + __pyx_t_5 = __Pyx_PyUnicode_Join(__pyx_t_3, 3, __pyx_t_12, __pyx_t_13); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 281, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_XDECREF_SET(__pyx_v_base_placeholder, ((PyObject*)__pyx_t_5)); + __pyx_t_5 = 0; + + /* "IndicTransToolkit/processor.pyx":283 + * base_placeholder = f"" + * # Map various placeholder formats to the matched text + * placeholder_entity_map[f""] = match # <<<<<<<<<<<<<< + * placeholder_entity_map[f"< ID{serial_no} >"] = match + * placeholder_entity_map[f"[ID{serial_no}]"] = match + */ + __pyx_t_5 = PyTuple_New(3); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 283, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_12 = 0; + __pyx_t_13 = 127; + __Pyx_INCREF(__pyx_kp_u_ID); + __pyx_t_12 += 3; + __Pyx_GIVEREF(__pyx_kp_u_ID); + PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u_ID); + __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 283, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_12 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3); + __pyx_t_3 = 0; + __Pyx_INCREF(__pyx_kp_u__141); + __pyx_t_12 += 1; + __Pyx_GIVEREF(__pyx_kp_u__141); + PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_kp_u__141); + __pyx_t_3 = __Pyx_PyUnicode_Join(__pyx_t_5, 3, __pyx_t_12, __pyx_t_13); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 283, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_3, __pyx_v_match) < 0))) __PYX_ERR(0, 283, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":284 + * # Map various placeholder formats to the matched text + * placeholder_entity_map[f""] = match + * placeholder_entity_map[f"< ID{serial_no} >"] = match # <<<<<<<<<<<<<< + * placeholder_entity_map[f"[ID{serial_no}]"] = match + * placeholder_entity_map[f"[ ID{serial_no} ]"] = match + */ + __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 284, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_12 = 0; + __pyx_t_13 = 127; + __Pyx_INCREF(__pyx_kp_u_ID_2); + __pyx_t_12 += 4; + __Pyx_GIVEREF(__pyx_kp_u_ID_2); + PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_kp_u_ID_2); + __pyx_t_5 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 284, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_12 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_5); + __pyx_t_5 = 0; + __Pyx_INCREF(__pyx_kp_u__142); + __pyx_t_12 += 2; + __Pyx_GIVEREF(__pyx_kp_u__142); + PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_kp_u__142); + __pyx_t_5 = __Pyx_PyUnicode_Join(__pyx_t_3, 3, __pyx_t_12, __pyx_t_13); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 284, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_5, __pyx_v_match) < 0))) __PYX_ERR(0, 284, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + + /* "IndicTransToolkit/processor.pyx":285 + * placeholder_entity_map[f""] = match + * placeholder_entity_map[f"< ID{serial_no} >"] = match + * placeholder_entity_map[f"[ID{serial_no}]"] = match # <<<<<<<<<<<<<< + * placeholder_entity_map[f"[ ID{serial_no} ]"] = match + * placeholder_entity_map[f"[ID {serial_no}]"] = match + */ + __pyx_t_5 = PyTuple_New(3); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 285, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_12 = 0; + __pyx_t_13 = 127; + __Pyx_INCREF(__pyx_kp_u_ID_3); + __pyx_t_12 += 3; + __Pyx_GIVEREF(__pyx_kp_u_ID_3); + PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u_ID_3); + __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 285, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_12 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3); + __pyx_t_3 = 0; + __Pyx_INCREF(__pyx_kp_u__143); + __pyx_t_12 += 1; + __Pyx_GIVEREF(__pyx_kp_u__143); + PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_kp_u__143); + __pyx_t_3 = __Pyx_PyUnicode_Join(__pyx_t_5, 3, __pyx_t_12, __pyx_t_13); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 285, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_3, __pyx_v_match) < 0))) __PYX_ERR(0, 285, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":286 + * placeholder_entity_map[f"< ID{serial_no} >"] = match + * placeholder_entity_map[f"[ID{serial_no}]"] = match + * placeholder_entity_map[f"[ ID{serial_no} ]"] = match # <<<<<<<<<<<<<< + * placeholder_entity_map[f"[ID {serial_no}]"] = match + * placeholder_entity_map[f""] = match + */ + __pyx_t_5 = __pyx_v_self->_INDIC_FAILURE_CASES; + __Pyx_INCREF(__pyx_t_5); + if (unlikely(__pyx_t_5 == Py_None)) { + PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); + __PYX_ERR(0, 293, __pyx_L1_error) + } + __pyx_t_12 = __Pyx_PyList_GET_SIZE(__pyx_t_5); if (unlikely(__pyx_t_12 == ((Py_ssize_t)-1))) __PYX_ERR(0, 293, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_14 = __pyx_t_12; + for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_14; __pyx_t_10+=1) { + __pyx_v_i = __pyx_t_10; + + /* "IndicTransToolkit/processor.pyx":294 + * # Handle Indic failure cases + * for i in range(len(self._INDIC_FAILURE_CASES)): + * indic_case = self._INDIC_FAILURE_CASES[i] # <<<<<<<<<<<<<< + * placeholder_entity_map[f"<{indic_case}{serial_no}>"] = match + * placeholder_entity_map[f"< {indic_case}{serial_no} >"] = match + */ + if (unlikely(__pyx_v_self->_INDIC_FAILURE_CASES == Py_None)) { + PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); + __PYX_ERR(0, 294, __pyx_L1_error) + } + __pyx_t_5 = PyList_GET_ITEM(__pyx_v_self->_INDIC_FAILURE_CASES, __pyx_v_i); + __Pyx_INCREF(__pyx_t_5); + __Pyx_XDECREF_SET(__pyx_v_indic_case, __pyx_t_5); + __pyx_t_5 = 0; + + /* "IndicTransToolkit/processor.pyx":295 + * for i in range(len(self._INDIC_FAILURE_CASES)): + * indic_case = self._INDIC_FAILURE_CASES[i] + * placeholder_entity_map[f"<{indic_case}{serial_no}>"] = match # <<<<<<<<<<<<<< + * placeholder_entity_map[f"< {indic_case}{serial_no} >"] = match + * placeholder_entity_map[f"< {indic_case} {serial_no} >"] = match + */ + __pyx_t_5 = PyTuple_New(4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 295, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_15 = 0; + __pyx_t_13 = 127; + __Pyx_INCREF(__pyx_kp_u__145); + __pyx_t_15 += 1; + __Pyx_GIVEREF(__pyx_kp_u__145); + PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u__145); + __pyx_t_3 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 295, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) : __pyx_t_13; + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3); + __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 295, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_t_3); + __pyx_t_3 = 0; + __Pyx_INCREF(__pyx_kp_u__141); + __pyx_t_15 += 1; + __Pyx_GIVEREF(__pyx_kp_u__141); + PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_kp_u__141); + __pyx_t_3 = __Pyx_PyUnicode_Join(__pyx_t_5, 4, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 295, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_3, __pyx_v_match) < 0))) __PYX_ERR(0, 295, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":296 + * indic_case = self._INDIC_FAILURE_CASES[i] + * placeholder_entity_map[f"<{indic_case}{serial_no}>"] = match + * placeholder_entity_map[f"< {indic_case}{serial_no} >"] = match # <<<<<<<<<<<<<< + * placeholder_entity_map[f"< {indic_case} {serial_no} >"] = match + * placeholder_entity_map[f"<{indic_case} {serial_no}]"] = match + */ + __pyx_t_3 = PyTuple_New(4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 296, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_15 = 0; + __pyx_t_13 = 127; + __Pyx_INCREF(__pyx_kp_u__146); + __pyx_t_15 += 2; + __Pyx_GIVEREF(__pyx_kp_u__146); + PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_kp_u__146); + __pyx_t_5 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 296, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) : __pyx_t_13; + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_5); + __pyx_t_5 = 0; + __pyx_t_5 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 296, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_5); + __pyx_t_5 = 0; + __Pyx_INCREF(__pyx_kp_u__142); + __pyx_t_15 += 2; + __Pyx_GIVEREF(__pyx_kp_u__142); + PyTuple_SET_ITEM(__pyx_t_3, 3, __pyx_kp_u__142); + __pyx_t_5 = __Pyx_PyUnicode_Join(__pyx_t_3, 4, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 296, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_5, __pyx_v_match) < 0))) __PYX_ERR(0, 296, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + + /* "IndicTransToolkit/processor.pyx":297 + * placeholder_entity_map[f"<{indic_case}{serial_no}>"] = match + * placeholder_entity_map[f"< {indic_case}{serial_no} >"] = match + * placeholder_entity_map[f"< {indic_case} {serial_no} >"] = match # <<<<<<<<<<<<<< + * placeholder_entity_map[f"<{indic_case} {serial_no}]"] = match + * placeholder_entity_map[f"< {indic_case} {serial_no} ]"] = match + */ + __pyx_t_5 = PyTuple_New(5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 297, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_15 = 0; + __pyx_t_13 = 127; + __Pyx_INCREF(__pyx_kp_u__146); + __pyx_t_15 += 2; + __Pyx_GIVEREF(__pyx_kp_u__146); + PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u__146); + __pyx_t_3 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 297, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) : __pyx_t_13; + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3); + __pyx_t_3 = 0; + __Pyx_INCREF(__pyx_kp_u__139); + __pyx_t_15 += 1; + __Pyx_GIVEREF(__pyx_kp_u__139); + PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_kp_u__139); + __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 297, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_t_3); + __pyx_t_3 = 0; + __Pyx_INCREF(__pyx_kp_u__142); + __pyx_t_15 += 2; + __Pyx_GIVEREF(__pyx_kp_u__142); + PyTuple_SET_ITEM(__pyx_t_5, 4, __pyx_kp_u__142); + __pyx_t_3 = __Pyx_PyUnicode_Join(__pyx_t_5, 5, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 297, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_3, __pyx_v_match) < 0))) __PYX_ERR(0, 297, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":298 + * placeholder_entity_map[f"< {indic_case}{serial_no} >"] = match + * placeholder_entity_map[f"< {indic_case} {serial_no} >"] = match + * placeholder_entity_map[f"<{indic_case} {serial_no}]"] = match # <<<<<<<<<<<<<< + * placeholder_entity_map[f"< {indic_case} {serial_no} ]"] = match + * placeholder_entity_map[f"[{indic_case}{serial_no}]"] = match + */ + __pyx_t_3 = PyTuple_New(5); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 298, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_15 = 0; + __pyx_t_13 = 127; + __Pyx_INCREF(__pyx_kp_u__145); + __pyx_t_15 += 1; + __Pyx_GIVEREF(__pyx_kp_u__145); + PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_kp_u__145); + __pyx_t_5 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 298, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) : __pyx_t_13; + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_5); + __pyx_t_5 = 0; + __Pyx_INCREF(__pyx_kp_u__139); + __pyx_t_15 += 1; + __Pyx_GIVEREF(__pyx_kp_u__139); + PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_kp_u__139); + __pyx_t_5 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 298, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + PyTuple_SET_ITEM(__pyx_t_3, 3, __pyx_t_5); + __pyx_t_5 = 0; + __Pyx_INCREF(__pyx_kp_u__143); + __pyx_t_15 += 1; + __Pyx_GIVEREF(__pyx_kp_u__143); + PyTuple_SET_ITEM(__pyx_t_3, 4, __pyx_kp_u__143); + __pyx_t_5 = __Pyx_PyUnicode_Join(__pyx_t_3, 5, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 298, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_5, __pyx_v_match) < 0))) __PYX_ERR(0, 298, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + + /* "IndicTransToolkit/processor.pyx":299 + * placeholder_entity_map[f"< {indic_case} {serial_no} >"] = match + * placeholder_entity_map[f"<{indic_case} {serial_no}]"] = match + * placeholder_entity_map[f"< {indic_case} {serial_no} ]"] = match # <<<<<<<<<<<<<< + * placeholder_entity_map[f"[{indic_case}{serial_no}]"] = match + * placeholder_entity_map[f"[{indic_case} {serial_no}]"] = match + */ + __pyx_t_5 = PyTuple_New(5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 299, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_15 = 0; + __pyx_t_13 = 127; + __Pyx_INCREF(__pyx_kp_u__146); + __pyx_t_15 += 2; + __Pyx_GIVEREF(__pyx_kp_u__146); + PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u__146); + __pyx_t_3 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 299, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) : __pyx_t_13; + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3); + __pyx_t_3 = 0; + __Pyx_INCREF(__pyx_kp_u__139); + __pyx_t_15 += 1; + __Pyx_GIVEREF(__pyx_kp_u__139); + PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_kp_u__139); + __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 299, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_t_3); + __pyx_t_3 = 0; + __Pyx_INCREF(__pyx_kp_u__144); + __pyx_t_15 += 2; + __Pyx_GIVEREF(__pyx_kp_u__144); + PyTuple_SET_ITEM(__pyx_t_5, 4, __pyx_kp_u__144); + __pyx_t_3 = __Pyx_PyUnicode_Join(__pyx_t_5, 5, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 299, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_3, __pyx_v_match) < 0))) __PYX_ERR(0, 299, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":300 + * placeholder_entity_map[f"<{indic_case} {serial_no}]"] = match + * placeholder_entity_map[f"< {indic_case} {serial_no} ]"] = match + * placeholder_entity_map[f"[{indic_case}{serial_no}]"] = match # <<<<<<<<<<<<<< + * placeholder_entity_map[f"[{indic_case} {serial_no}]"] = match + * placeholder_entity_map[f"[ {indic_case}{serial_no} ]"] = match + */ + __pyx_t_3 = PyTuple_New(4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 300, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_15 = 0; + __pyx_t_13 = 127; + __Pyx_INCREF(__pyx_kp_u__147); + __pyx_t_15 += 1; + __Pyx_GIVEREF(__pyx_kp_u__147); + PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_kp_u__147); + __pyx_t_5 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 300, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) : __pyx_t_13; + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_5); + __pyx_t_5 = 0; + __pyx_t_5 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 300, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_5); + __pyx_t_5 = 0; + __Pyx_INCREF(__pyx_kp_u__143); + __pyx_t_15 += 1; + __Pyx_GIVEREF(__pyx_kp_u__143); + PyTuple_SET_ITEM(__pyx_t_3, 3, __pyx_kp_u__143); + __pyx_t_5 = __Pyx_PyUnicode_Join(__pyx_t_3, 4, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 300, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_5, __pyx_v_match) < 0))) __PYX_ERR(0, 300, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + + /* "IndicTransToolkit/processor.pyx":301 + * placeholder_entity_map[f"< {indic_case} {serial_no} ]"] = match + * placeholder_entity_map[f"[{indic_case}{serial_no}]"] = match + * placeholder_entity_map[f"[{indic_case} {serial_no}]"] = match # <<<<<<<<<<<<<< + * placeholder_entity_map[f"[ {indic_case}{serial_no} ]"] = match + * placeholder_entity_map[f"[ {indic_case} {serial_no} ]"] = match + */ + __pyx_t_5 = PyTuple_New(5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 301, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_15 = 0; + __pyx_t_13 = 127; + __Pyx_INCREF(__pyx_kp_u__147); + __pyx_t_15 += 1; + __Pyx_GIVEREF(__pyx_kp_u__147); + PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u__147); + __pyx_t_3 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 301, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) : __pyx_t_13; + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3); + __pyx_t_3 = 0; + __Pyx_INCREF(__pyx_kp_u__139); + __pyx_t_15 += 1; + __Pyx_GIVEREF(__pyx_kp_u__139); + PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_kp_u__139); + __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 301, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_t_3); + __pyx_t_3 = 0; + __Pyx_INCREF(__pyx_kp_u__143); + __pyx_t_15 += 1; + __Pyx_GIVEREF(__pyx_kp_u__143); + PyTuple_SET_ITEM(__pyx_t_5, 4, __pyx_kp_u__143); + __pyx_t_3 = __Pyx_PyUnicode_Join(__pyx_t_5, 5, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 301, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_3, __pyx_v_match) < 0))) __PYX_ERR(0, 301, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":302 + * placeholder_entity_map[f"[{indic_case}{serial_no}]"] = match + * placeholder_entity_map[f"[{indic_case} {serial_no}]"] = match + * placeholder_entity_map[f"[ {indic_case}{serial_no} ]"] = match # <<<<<<<<<<<<<< + * placeholder_entity_map[f"[ {indic_case} {serial_no} ]"] = match + * placeholder_entity_map[f"{indic_case} {serial_no}"] = match + */ + __pyx_t_3 = PyTuple_New(4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 302, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_15 = 0; + __pyx_t_13 = 127; + __Pyx_INCREF(__pyx_kp_u__148); + __pyx_t_15 += 2; + __Pyx_GIVEREF(__pyx_kp_u__148); + PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_kp_u__148); + __pyx_t_5 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 302, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) : __pyx_t_13; + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_5); + __pyx_t_5 = 0; + __pyx_t_5 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 302, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_5); + __pyx_t_5 = 0; + __Pyx_INCREF(__pyx_kp_u__144); + __pyx_t_15 += 2; + __Pyx_GIVEREF(__pyx_kp_u__144); + PyTuple_SET_ITEM(__pyx_t_3, 3, __pyx_kp_u__144); + __pyx_t_5 = __Pyx_PyUnicode_Join(__pyx_t_3, 4, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 302, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_5, __pyx_v_match) < 0))) __PYX_ERR(0, 302, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + + /* "IndicTransToolkit/processor.pyx":303 + * placeholder_entity_map[f"[{indic_case} {serial_no}]"] = match + * placeholder_entity_map[f"[ {indic_case}{serial_no} ]"] = match + * placeholder_entity_map[f"[ {indic_case} {serial_no} ]"] = match # <<<<<<<<<<<<<< + * placeholder_entity_map[f"{indic_case} {serial_no}"] = match + * placeholder_entity_map[f"{indic_case}{serial_no}"] = match + */ + __pyx_t_5 = PyTuple_New(5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 303, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_15 = 0; + __pyx_t_13 = 127; + __Pyx_INCREF(__pyx_kp_u__148); + __pyx_t_15 += 2; + __Pyx_GIVEREF(__pyx_kp_u__148); + PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u__148); + __pyx_t_3 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 303, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_3) : __pyx_t_13; + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3); + __pyx_t_3 = 0; + __Pyx_INCREF(__pyx_kp_u__139); + __pyx_t_15 += 1; + __Pyx_GIVEREF(__pyx_kp_u__139); + PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_kp_u__139); + __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 303, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_t_3); + __pyx_t_3 = 0; + __Pyx_INCREF(__pyx_kp_u__144); + __pyx_t_15 += 2; + __Pyx_GIVEREF(__pyx_kp_u__144); + PyTuple_SET_ITEM(__pyx_t_5, 4, __pyx_kp_u__144); + __pyx_t_3 = __Pyx_PyUnicode_Join(__pyx_t_5, 5, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 303, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_3, __pyx_v_match) < 0))) __PYX_ERR(0, 303, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":304 + * placeholder_entity_map[f"[ {indic_case}{serial_no} ]"] = match + * placeholder_entity_map[f"[ {indic_case} {serial_no} ]"] = match + * placeholder_entity_map[f"{indic_case} {serial_no}"] = match # <<<<<<<<<<<<<< + * placeholder_entity_map[f"{indic_case}{serial_no}"] = match + * + */ + __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 304, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_15 = 0; + __pyx_t_13 = 127; + __pyx_t_5 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 304, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_13 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) > __pyx_t_13) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_5) : __pyx_t_13; + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_5); + __pyx_t_5 = 0; + __Pyx_INCREF(__pyx_kp_u__139); + __pyx_t_15 += 1; + __Pyx_GIVEREF(__pyx_kp_u__139); + PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_kp_u__139); + __pyx_t_5 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 304, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_15 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_5); + __pyx_t_5 = 0; + __pyx_t_5 = __Pyx_PyUnicode_Join(__pyx_t_3, 3, __pyx_t_15, __pyx_t_13); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 304, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_5, __pyx_v_match) < 0))) __PYX_ERR(0, 304, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + + /* "IndicTransToolkit/processor.pyx":305 + * placeholder_entity_map[f"[ {indic_case} {serial_no} ]"] = match + * placeholder_entity_map[f"{indic_case} {serial_no}"] = match + * placeholder_entity_map[f"{indic_case}{serial_no}"] = match # <<<<<<<<<<<<<< + * + * # Replace the match with the base placeholder + */ + __pyx_t_5 = __Pyx_PyObject_FormatSimple(__pyx_v_indic_case, __pyx_empty_unicode); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 305, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_3 = __Pyx_PyUnicode_From_int(__pyx_v_serial_no, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 305, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_16 = __Pyx_PyUnicode_ConcatInPlace(__pyx_t_5, __pyx_t_3); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 305, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely((PyDict_SetItem(__pyx_v_placeholder_entity_map, __pyx_t_16, __pyx_v_match) < 0))) __PYX_ERR(0, 305, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + } + + /* "IndicTransToolkit/processor.pyx":308 + * + * # Replace the match with the base placeholder + * text = text.replace(match, base_placeholder) # <<<<<<<<<<<<<< + * serial_no += 1 + * + */ + if (unlikely(__pyx_v_text == Py_None)) { + PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "replace"); + __PYX_ERR(0, 308, __pyx_L1_error) + } + __pyx_t_16 = PyUnicode_Replace(__pyx_v_text, __pyx_v_match, __pyx_v_base_placeholder, -1L); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 308, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_16)); + __pyx_t_16 = 0; + + /* "IndicTransToolkit/processor.pyx":309 + * # Replace the match with the base placeholder + * text = text.replace(match, base_placeholder) + * serial_no += 1 # <<<<<<<<<<<<<< + * + * # Clean up any remaining placeholder artifacts + */ + __pyx_v_serial_no = (__pyx_v_serial_no + 1); + __pyx_L5_continue:; + } + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + + /* "IndicTransToolkit/processor.pyx":270 + * cdef int i + * + * for pattern in patterns: # <<<<<<<<<<<<<< + * matches = set(pattern.findall(text)) + * for match in matches: + */ + } + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":312 + * + * # Clean up any remaining placeholder artifacts + * text = re.sub(r"\s+", " ", text).replace(">/", ">").replace("]/", "]") # <<<<<<<<<<<<<< + * self._placeholder_entity_maps.put(placeholder_entity_map) + * return text + */ + __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_re); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 312, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_sub); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 312, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = NULL; + __pyx_t_6 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_16))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_16); + if (likely(__pyx_t_4)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_16); + __Pyx_INCREF(__pyx_t_4); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_16, function); + __pyx_t_6 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[4] = {__pyx_t_4, __pyx_kp_u_s_3, __pyx_kp_u__139, __pyx_v_text}; + __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_16, __pyx_callargs+1-__pyx_t_6, 3+__pyx_t_6); + __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 312, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + } + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_replace); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 312, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_16, __pyx_tuple__150, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 312, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_replace); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 312, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_16, __pyx_tuple__152, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 312, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + if (!(likely(PyUnicode_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_1))) __PYX_ERR(0, 312, __pyx_L1_error) + __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_1)); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":313 + * # Clean up any remaining placeholder artifacts + * text = re.sub(r"\s+", " ", text).replace(">/", ">").replace("]/", "]") + * self._placeholder_entity_maps.put(placeholder_entity_map) # <<<<<<<<<<<<<< + * return text + * + */ + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_placeholder_entity_maps, __pyx_n_s_put); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 313, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __pyx_t_4 = NULL; + __pyx_t_6 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_16))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_16); + if (likely(__pyx_t_4)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_16); + __Pyx_INCREF(__pyx_t_4); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_16, function); + __pyx_t_6 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_4, __pyx_v_placeholder_entity_map}; + __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_16, __pyx_callargs+1-__pyx_t_6, 1+__pyx_t_6); + __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 313, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + } + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":314 + * text = re.sub(r"\s+", " ", text).replace(">/", ">").replace("]/", "]") + * self._placeholder_entity_maps.put(placeholder_entity_map) + * return text # <<<<<<<<<<<<<< + * + * # Internal Method: Normalize Text + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_text); + __pyx_r = __pyx_v_text; + goto __pyx_L0; + + /* "IndicTransToolkit/processor.pyx":251 + * + * # Internal Method: Wrap Text with Placeholders + * cdef str _wrap_with_placeholders(self, str text) except *: # <<<<<<<<<<<<<< + * """ + * Wrap substrings with matched patterns in the text with placeholders. + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_16); + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor._wrap_with_placeholders", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_placeholder_entity_map); + __Pyx_XDECREF(__pyx_v_patterns); + __Pyx_XDECREF(__pyx_v_pattern); + __Pyx_XDECREF(__pyx_v_matches); + __Pyx_XDECREF(__pyx_v_match); + __Pyx_XDECREF(__pyx_v_base_placeholder); + __Pyx_XDECREF(__pyx_v_indic_case); + __Pyx_XDECREF(__pyx_v_text); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "IndicTransToolkit/processor.pyx":317 + * + * # Internal Method: Normalize Text + * cdef str _normalize(self, str text) except *: # <<<<<<<<<<<<<< + * """ + * Normalizes numerals and optionally wraps placeholders. + */ + +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__normalize(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_text) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("_normalize", 0); + __Pyx_INCREF(__pyx_v_text); + + /* "IndicTransToolkit/processor.pyx":322 + * """ + * # Single-pass digit translation + * text = text.translate(self._digits_translation_table) # <<<<<<<<<<<<<< + * + * if self.inference: + */ + __pyx_t_1 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PyUnicode_Type_translate, __pyx_v_text, __pyx_v_self->_digits_translation_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 322, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(PyUnicode_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_1))) __PYX_ERR(0, 322, __pyx_L1_error) + __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_1)); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":324 + * text = text.translate(self._digits_translation_table) + * + * if self.inference: # <<<<<<<<<<<<<< + * text = self._wrap_with_placeholders(text) + * return text + */ + if (__pyx_v_self->inference) { + + /* "IndicTransToolkit/processor.pyx":325 + * + * if self.inference: + * text = self._wrap_with_placeholders(text) # <<<<<<<<<<<<<< + * return text + * + */ + __pyx_t_1 = ((struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self->__pyx_vtab)->_wrap_with_placeholders(__pyx_v_self, __pyx_v_text); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 325, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_text, ((PyObject*)__pyx_t_1)); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":324 + * text = text.translate(self._digits_translation_table) + * + * if self.inference: # <<<<<<<<<<<<<< + * text = self._wrap_with_placeholders(text) + * return text + */ + } + + /* "IndicTransToolkit/processor.pyx":326 + * if self.inference: + * text = self._wrap_with_placeholders(text) + * return text # <<<<<<<<<<<<<< + * + * # Internal Method: Indic Tokenize and Transliterate + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_text); + __pyx_r = __pyx_v_text; + goto __pyx_L0; + + /* "IndicTransToolkit/processor.pyx":317 + * + * # Internal Method: Normalize Text + * cdef str _normalize(self, str text) except *: # <<<<<<<<<<<<<< + * """ + * Normalizes numerals and optionally wraps placeholders. + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor._normalize", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_text); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "IndicTransToolkit/processor.pyx":329 + * + * # Internal Method: Indic Tokenize and Transliterate + * cdef str _do_indic_tokenize_and_transliterate( # <<<<<<<<<<<<<< + * self, + * str sentence, + */ + +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__do_indic_tokenize_and_transliterate(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sentence, PyObject *__pyx_v_normalizer, PyObject *__pyx_v_iso_lang, int __pyx_v_transliterate) { + PyObject *__pyx_v_normed = 0; + PyObject *__pyx_v_tokens = 0; + PyObject *__pyx_v_joined = 0; + PyObject *__pyx_v_xlated = 0; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + unsigned int __pyx_t_5; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("_do_indic_tokenize_and_transliterate", 1); + + /* "IndicTransToolkit/processor.pyx":344 + * cdef str xlated + * + * normed = normalizer.normalize(sentence.strip()) # <<<<<<<<<<<<<< + * tokens = indic_tokenize.trivial_tokenize(normed, iso_lang) + * joined = " ".join(tokens) + */ + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_normalizer, __pyx_n_s_normalize); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 344, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = __Pyx_CallUnboundCMethod0(&__pyx_umethod_PyUnicode_Type_strip, __pyx_v_sentence); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 344, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = NULL; + __pyx_t_5 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_2))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_2); + if (likely(__pyx_t_4)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2); + __Pyx_INCREF(__pyx_t_4); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_2, function); + __pyx_t_5 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_4, __pyx_t_3}; + __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_2, __pyx_callargs+1-__pyx_t_5, 1+__pyx_t_5); + __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 344, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + } + if (!(likely(PyUnicode_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_1))) __PYX_ERR(0, 344, __pyx_L1_error) + __pyx_v_normed = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":345 + * + * normed = normalizer.normalize(sentence.strip()) + * tokens = indic_tokenize.trivial_tokenize(normed, iso_lang) # <<<<<<<<<<<<<< + * joined = " ".join(tokens) + * xlated = joined + */ + __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_indic_tokenize); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 345, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_trivial_tokenize); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 345, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = NULL; + __pyx_t_5 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_3))) { + __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3); + if (likely(__pyx_t_2)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3); + __Pyx_INCREF(__pyx_t_2); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_3, function); + __pyx_t_5 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[3] = {__pyx_t_2, __pyx_v_normed, __pyx_v_iso_lang}; + __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_3, __pyx_callargs+1-__pyx_t_5, 2+__pyx_t_5); + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 345, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + } + if (!(likely(PyList_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("list", __pyx_t_1))) __PYX_ERR(0, 345, __pyx_L1_error) + __pyx_v_tokens = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":346 + * normed = normalizer.normalize(sentence.strip()) + * tokens = indic_tokenize.trivial_tokenize(normed, iso_lang) + * joined = " ".join(tokens) # <<<<<<<<<<<<<< + * xlated = joined + * if transliterate: + */ + __pyx_t_1 = PyUnicode_Join(__pyx_kp_u__139, __pyx_v_tokens); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 346, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_v_joined = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":347 + * tokens = indic_tokenize.trivial_tokenize(normed, iso_lang) + * joined = " ".join(tokens) + * xlated = joined # <<<<<<<<<<<<<< + * if transliterate: + * xlated = self._xliterator.transliterate(joined, iso_lang, "hi") + */ + __Pyx_INCREF(__pyx_v_joined); + __pyx_v_xlated = __pyx_v_joined; + + /* "IndicTransToolkit/processor.pyx":348 + * joined = " ".join(tokens) + * xlated = joined + * if transliterate: # <<<<<<<<<<<<<< + * xlated = self._xliterator.transliterate(joined, iso_lang, "hi") + * xlated = xlated.replace(" ", "") + */ + if (__pyx_v_transliterate) { + + /* "IndicTransToolkit/processor.pyx":349 + * xlated = joined + * if transliterate: + * xlated = self._xliterator.transliterate(joined, iso_lang, "hi") # <<<<<<<<<<<<<< + * xlated = xlated.replace(" ", "") + * return xlated + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_xliterator, __pyx_n_s_transliterate); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 349, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = NULL; + __pyx_t_5 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_3))) { + __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3); + if (likely(__pyx_t_2)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3); + __Pyx_INCREF(__pyx_t_2); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_3, function); + __pyx_t_5 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[4] = {__pyx_t_2, __pyx_v_joined, __pyx_v_iso_lang, __pyx_n_u_hi}; + __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_3, __pyx_callargs+1-__pyx_t_5, 3+__pyx_t_5); + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 349, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + } + if (!(likely(PyUnicode_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_1))) __PYX_ERR(0, 349, __pyx_L1_error) + __Pyx_DECREF_SET(__pyx_v_xlated, ((PyObject*)__pyx_t_1)); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":350 + * if transliterate: + * xlated = self._xliterator.transliterate(joined, iso_lang, "hi") + * xlated = xlated.replace(" ", "") # <<<<<<<<<<<<<< + * return xlated + * + */ + if (unlikely(__pyx_v_xlated == Py_None)) { + PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "replace"); + __PYX_ERR(0, 350, __pyx_L1_error) + } + __pyx_t_1 = PyUnicode_Replace(__pyx_v_xlated, __pyx_kp_u__153, __pyx_kp_u__154, -1L); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 350, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_xlated, ((PyObject*)__pyx_t_1)); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":348 + * joined = " ".join(tokens) + * xlated = joined + * if transliterate: # <<<<<<<<<<<<<< + * xlated = self._xliterator.transliterate(joined, iso_lang, "hi") + * xlated = xlated.replace(" ", "") + */ + } + + /* "IndicTransToolkit/processor.pyx":351 + * xlated = self._xliterator.transliterate(joined, iso_lang, "hi") + * xlated = xlated.replace(" ", "") + * return xlated # <<<<<<<<<<<<<< + * + * # Internal Method: Preprocess a Single Sentence + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_xlated); + __pyx_r = __pyx_v_xlated; + goto __pyx_L0; + + /* "IndicTransToolkit/processor.pyx":329 + * + * # Internal Method: Indic Tokenize and Transliterate + * cdef str _do_indic_tokenize_and_transliterate( # <<<<<<<<<<<<<< + * self, + * str sentence, + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor._do_indic_tokenize_and_transliterate", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_normed); + __Pyx_XDECREF(__pyx_v_tokens); + __Pyx_XDECREF(__pyx_v_joined); + __Pyx_XDECREF(__pyx_v_xlated); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "IndicTransToolkit/processor.pyx":354 + * + * # Internal Method: Preprocess a Single Sentence + * cdef str _preprocess( # <<<<<<<<<<<<<< + * self, + * str sent, + */ + +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__preprocess(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sent, PyObject *__pyx_v_src_lang, PyObject *__pyx_v_tgt_lang, PyObject *__pyx_v_normalizer, int __pyx_v_is_target) { + PyObject *__pyx_v_iso_lang = 0; + PyObject *__pyx_v_script_part = 0; + int __pyx_v_do_transliterate; + PyObject *__pyx_v_e_strip = 0; + PyObject *__pyx_v_e_norm = 0; + PyObject *__pyx_v_e_tokens = 0; + PyObject *__pyx_v_processed_sent = 0; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_t_4; + int __pyx_t_5; + PyObject *__pyx_t_6 = NULL; + unsigned int __pyx_t_7; + PyObject *__pyx_t_8 = NULL; + Py_ssize_t __pyx_t_9; + Py_UCS4 __pyx_t_10; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("_preprocess", 0); + __Pyx_INCREF(__pyx_v_sent); + + /* "IndicTransToolkit/processor.pyx":366 + * tokenization, transliteration, and adding language tags if necessary. + * """ + * cdef str iso_lang = self._flores_codes.get(src_lang, "hi") # <<<<<<<<<<<<<< + * cdef str script_part = src_lang.split("_")[1] + * cdef bint do_transliterate = True + */ + if (unlikely(__pyx_v_self->_flores_codes == Py_None)) { + PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "get"); + __PYX_ERR(0, 366, __pyx_L1_error) + } + __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_self->_flores_codes, __pyx_v_src_lang, __pyx_n_u_hi); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 366, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(PyUnicode_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_1))) __PYX_ERR(0, 366, __pyx_L1_error) + __pyx_v_iso_lang = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":367 + * """ + * cdef str iso_lang = self._flores_codes.get(src_lang, "hi") + * cdef str script_part = src_lang.split("_")[1] # <<<<<<<<<<<<<< + * cdef bint do_transliterate = True + * cdef str e_strip + */ + if (unlikely(__pyx_v_src_lang == Py_None)) { + PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "split"); + __PYX_ERR(0, 367, __pyx_L1_error) + } + __pyx_t_1 = PyUnicode_Split(__pyx_v_src_lang, __Pyx_NoneAsNull(__pyx_n_u__155), -1L); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 367, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_GetItemInt_List(__pyx_t_1, 1, long, 1, __Pyx_PyInt_From_long, 1, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 367, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 367, __pyx_L1_error) + __pyx_v_script_part = ((PyObject*)__pyx_t_2); + __pyx_t_2 = 0; + + /* "IndicTransToolkit/processor.pyx":368 + * cdef str iso_lang = self._flores_codes.get(src_lang, "hi") + * cdef str script_part = src_lang.split("_")[1] + * cdef bint do_transliterate = True # <<<<<<<<<<<<<< + * cdef str e_strip + * cdef str e_norm + */ + __pyx_v_do_transliterate = 1; + + /* "IndicTransToolkit/processor.pyx":375 + * + * # 1) Punctuation normalization + * sent = self._punc_norm(sent) # <<<<<<<<<<<<<< + * + * # 2) Numerals & placeholders + */ + __pyx_t_2 = ((struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self->__pyx_vtab)->_punc_norm(__pyx_v_self, __pyx_v_sent); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 375, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF_SET(__pyx_v_sent, ((PyObject*)__pyx_t_2)); + __pyx_t_2 = 0; + + /* "IndicTransToolkit/processor.pyx":378 + * + * # 2) Numerals & placeholders + * sent = self._normalize(sent) # <<<<<<<<<<<<<< + * + * if script_part in ["Arab", "Aran", "Olck", "Mtei", "Latn"]: + */ + __pyx_t_2 = ((struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self->__pyx_vtab)->_normalize(__pyx_v_self, __pyx_v_sent); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 378, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF_SET(__pyx_v_sent, ((PyObject*)__pyx_t_2)); + __pyx_t_2 = 0; + + /* "IndicTransToolkit/processor.pyx":380 + * sent = self._normalize(sent) + * + * if script_part in ["Arab", "Aran", "Olck", "Mtei", "Latn"]: # <<<<<<<<<<<<<< + * do_transliterate = False + * + */ + __Pyx_INCREF(__pyx_v_script_part); + __pyx_t_3 = __pyx_v_script_part; + __pyx_t_5 = (__Pyx_PyUnicode_Equals(__pyx_t_3, __pyx_n_u_Arab, Py_EQ)); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 380, __pyx_L1_error) + if (!__pyx_t_5) { + } else { + __pyx_t_4 = __pyx_t_5; + goto __pyx_L4_bool_binop_done; + } + __pyx_t_5 = (__Pyx_PyUnicode_Equals(__pyx_t_3, __pyx_n_u_Aran, Py_EQ)); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 380, __pyx_L1_error) + if (!__pyx_t_5) { + } else { + __pyx_t_4 = __pyx_t_5; + goto __pyx_L4_bool_binop_done; + } + __pyx_t_5 = (__Pyx_PyUnicode_Equals(__pyx_t_3, __pyx_n_u_Olck, Py_EQ)); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 380, __pyx_L1_error) + if (!__pyx_t_5) { + } else { + __pyx_t_4 = __pyx_t_5; + goto __pyx_L4_bool_binop_done; + } + __pyx_t_5 = (__Pyx_PyUnicode_Equals(__pyx_t_3, __pyx_n_u_Mtei, Py_EQ)); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 380, __pyx_L1_error) + if (!__pyx_t_5) { + } else { + __pyx_t_4 = __pyx_t_5; + goto __pyx_L4_bool_binop_done; + } + __pyx_t_5 = (__Pyx_PyUnicode_Equals(__pyx_t_3, __pyx_n_u_Latn, Py_EQ)); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 380, __pyx_L1_error) + __pyx_t_4 = __pyx_t_5; + __pyx_L4_bool_binop_done:; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_5 = __pyx_t_4; + if (__pyx_t_5) { + + /* "IndicTransToolkit/processor.pyx":381 + * + * if script_part in ["Arab", "Aran", "Olck", "Mtei", "Latn"]: + * do_transliterate = False # <<<<<<<<<<<<<< + * + * if iso_lang == "en": + */ + __pyx_v_do_transliterate = 0; + + /* "IndicTransToolkit/processor.pyx":380 + * sent = self._normalize(sent) + * + * if script_part in ["Arab", "Aran", "Olck", "Mtei", "Latn"]: # <<<<<<<<<<<<<< + * do_transliterate = False + * + */ + } + + /* "IndicTransToolkit/processor.pyx":383 + * do_transliterate = False + * + * if iso_lang == "en": # <<<<<<<<<<<<<< + * # English path + * e_strip = sent.strip() + */ + __pyx_t_5 = (__Pyx_PyUnicode_Equals(__pyx_v_iso_lang, __pyx_n_u_en, Py_EQ)); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 383, __pyx_L1_error) + if (__pyx_t_5) { + + /* "IndicTransToolkit/processor.pyx":385 + * if iso_lang == "en": + * # English path + * e_strip = sent.strip() # <<<<<<<<<<<<<< + * e_norm = self._en_normalizer.normalize(e_strip) + * e_tokens = self._en_tok.tokenize(e_norm, escape=False) + */ + __pyx_t_2 = __Pyx_CallUnboundCMethod0(&__pyx_umethod_PyUnicode_Type_strip, __pyx_v_sent); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 385, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 385, __pyx_L1_error) + __pyx_v_e_strip = ((PyObject*)__pyx_t_2); + __pyx_t_2 = 0; + + /* "IndicTransToolkit/processor.pyx":386 + * # English path + * e_strip = sent.strip() + * e_norm = self._en_normalizer.normalize(e_strip) # <<<<<<<<<<<<<< + * e_tokens = self._en_tok.tokenize(e_norm, escape=False) + * processed_sent = " ".join(e_tokens) + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_en_normalizer, __pyx_n_s_normalize); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 386, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_6 = NULL; + __pyx_t_7 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_6)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_6); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_7 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_6, __pyx_v_e_strip}; + __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_1, __pyx_callargs+1-__pyx_t_7, 1+__pyx_t_7); + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 386, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + } + if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_2))) __PYX_ERR(0, 386, __pyx_L1_error) + __pyx_v_e_norm = ((PyObject*)__pyx_t_2); + __pyx_t_2 = 0; + + /* "IndicTransToolkit/processor.pyx":387 + * e_strip = sent.strip() + * e_norm = self._en_normalizer.normalize(e_strip) + * e_tokens = self._en_tok.tokenize(e_norm, escape=False) # <<<<<<<<<<<<<< + * processed_sent = " ".join(e_tokens) + * else: + */ + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_en_tok, __pyx_n_s_tokenize); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 387, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 387, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_INCREF(__pyx_v_e_norm); + __Pyx_GIVEREF(__pyx_v_e_norm); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_e_norm)) __PYX_ERR(0, 387, __pyx_L1_error); + __pyx_t_6 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 387, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + if (PyDict_SetItem(__pyx_t_6, __pyx_n_s_escape, Py_False) < 0) __PYX_ERR(0, 387, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_1, __pyx_t_6); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 387, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + if (!(likely(PyList_CheckExact(__pyx_t_8))||((__pyx_t_8) == Py_None) || __Pyx_RaiseUnexpectedTypeError("list", __pyx_t_8))) __PYX_ERR(0, 387, __pyx_L1_error) + __pyx_v_e_tokens = ((PyObject*)__pyx_t_8); + __pyx_t_8 = 0; + + /* "IndicTransToolkit/processor.pyx":388 + * e_norm = self._en_normalizer.normalize(e_strip) + * e_tokens = self._en_tok.tokenize(e_norm, escape=False) + * processed_sent = " ".join(e_tokens) # <<<<<<<<<<<<<< + * else: + * # Indic path + */ + __pyx_t_8 = PyUnicode_Join(__pyx_kp_u__139, __pyx_v_e_tokens); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 388, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __pyx_v_processed_sent = ((PyObject*)__pyx_t_8); + __pyx_t_8 = 0; + + /* "IndicTransToolkit/processor.pyx":383 + * do_transliterate = False + * + * if iso_lang == "en": # <<<<<<<<<<<<<< + * # English path + * e_strip = sent.strip() + */ + goto __pyx_L9; + } + + /* "IndicTransToolkit/processor.pyx":391 + * else: + * # Indic path + * processed_sent = self._do_indic_tokenize_and_transliterate(sent, normalizer, iso_lang, do_transliterate) # <<<<<<<<<<<<<< + * + * processed_sent = processed_sent.strip() + */ + /*else*/ { + __pyx_t_8 = ((struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self->__pyx_vtab)->_do_indic_tokenize_and_transliterate(__pyx_v_self, __pyx_v_sent, __pyx_v_normalizer, __pyx_v_iso_lang, __pyx_v_do_transliterate); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 391, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __pyx_v_processed_sent = ((PyObject*)__pyx_t_8); + __pyx_t_8 = 0; + } + __pyx_L9:; + + /* "IndicTransToolkit/processor.pyx":393 + * processed_sent = self._do_indic_tokenize_and_transliterate(sent, normalizer, iso_lang, do_transliterate) + * + * processed_sent = processed_sent.strip() # <<<<<<<<<<<<<< + * if not is_target: + * return f"{src_lang} {tgt_lang} {processed_sent}" + */ + __pyx_t_8 = __Pyx_CallUnboundCMethod0(&__pyx_umethod_PyUnicode_Type_strip, __pyx_v_processed_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 393, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + if (!(likely(PyUnicode_CheckExact(__pyx_t_8))||((__pyx_t_8) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_8))) __PYX_ERR(0, 393, __pyx_L1_error) + __Pyx_DECREF_SET(__pyx_v_processed_sent, ((PyObject*)__pyx_t_8)); + __pyx_t_8 = 0; + + /* "IndicTransToolkit/processor.pyx":394 + * + * processed_sent = processed_sent.strip() + * if not is_target: # <<<<<<<<<<<<<< + * return f"{src_lang} {tgt_lang} {processed_sent}" + * else: + */ + __pyx_t_5 = (!__pyx_v_is_target); + if (__pyx_t_5) { + + /* "IndicTransToolkit/processor.pyx":395 + * processed_sent = processed_sent.strip() + * if not is_target: + * return f"{src_lang} {tgt_lang} {processed_sent}" # <<<<<<<<<<<<<< + * else: + * return processed_sent + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_8 = PyTuple_New(5); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 395, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_9 = 0; + __pyx_t_10 = 127; + __pyx_t_6 = __Pyx_PyUnicode_Unicode(__pyx_v_src_lang); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 395, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_10 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_6) > __pyx_t_10) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_6) : __pyx_t_10; + __pyx_t_9 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_6); + __Pyx_GIVEREF(__pyx_t_6); + PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_6); + __pyx_t_6 = 0; + __Pyx_INCREF(__pyx_kp_u__139); + __pyx_t_9 += 1; + __Pyx_GIVEREF(__pyx_kp_u__139); + PyTuple_SET_ITEM(__pyx_t_8, 1, __pyx_kp_u__139); + __pyx_t_6 = __Pyx_PyUnicode_Unicode(__pyx_v_tgt_lang); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 395, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_10 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_6) > __pyx_t_10) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_6) : __pyx_t_10; + __pyx_t_9 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_6); + __Pyx_GIVEREF(__pyx_t_6); + PyTuple_SET_ITEM(__pyx_t_8, 2, __pyx_t_6); + __pyx_t_6 = 0; + __Pyx_INCREF(__pyx_kp_u__139); + __pyx_t_9 += 1; + __Pyx_GIVEREF(__pyx_kp_u__139); + PyTuple_SET_ITEM(__pyx_t_8, 3, __pyx_kp_u__139); + __pyx_t_6 = __Pyx_PyUnicode_Unicode(__pyx_v_processed_sent); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 395, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_10 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_6) > __pyx_t_10) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_6) : __pyx_t_10; + __pyx_t_9 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_6); + __Pyx_GIVEREF(__pyx_t_6); + PyTuple_SET_ITEM(__pyx_t_8, 4, __pyx_t_6); + __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_PyUnicode_Join(__pyx_t_8, 5, __pyx_t_9, __pyx_t_10); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 395, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_r = ((PyObject*)__pyx_t_6); + __pyx_t_6 = 0; + goto __pyx_L0; + + /* "IndicTransToolkit/processor.pyx":394 + * + * processed_sent = processed_sent.strip() + * if not is_target: # <<<<<<<<<<<<<< + * return f"{src_lang} {tgt_lang} {processed_sent}" + * else: + */ + } + + /* "IndicTransToolkit/processor.pyx":397 + * return f"{src_lang} {tgt_lang} {processed_sent}" + * else: + * return processed_sent # <<<<<<<<<<<<<< + * + * # Internal Method: Postprocess a Single Sentence + */ + /*else*/ { + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_processed_sent); + __pyx_r = __pyx_v_processed_sent; + goto __pyx_L0; + } + + /* "IndicTransToolkit/processor.pyx":354 + * + * # Internal Method: Preprocess a Single Sentence + * cdef str _preprocess( # <<<<<<<<<<<<<< + * self, + * str sent, + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor._preprocess", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_iso_lang); + __Pyx_XDECREF(__pyx_v_script_part); + __Pyx_XDECREF(__pyx_v_e_strip); + __Pyx_XDECREF(__pyx_v_e_norm); + __Pyx_XDECREF(__pyx_v_e_tokens); + __Pyx_XDECREF(__pyx_v_processed_sent); + __Pyx_XDECREF(__pyx_v_sent); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "IndicTransToolkit/processor.pyx":400 + * + * # Internal Method: Postprocess a Single Sentence + * cdef str _postprocess(self, object sent, str lang) except *: # <<<<<<<<<<<<<< + * """ + * Postprocess a single sentence: + */ + +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__postprocess(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sent, PyObject *__pyx_v_lang) { + PyObject *__pyx_v_placeholder_entity_map = 0; + PyObject *__pyx_v_lang_code = 0; + PyObject *__pyx_v_script_code = 0; + PyObject *__pyx_v_iso_lang = 0; + PyObject *__pyx_v_k = 0; + PyObject *__pyx_v_v = 0; + PyObject *__pyx_v_xlated = 0; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + int __pyx_t_2; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + unsigned int __pyx_t_6; + PyObject *__pyx_t_7 = NULL; + Py_ssize_t __pyx_t_8; + Py_ssize_t __pyx_t_9; + int __pyx_t_10; + int __pyx_t_11; + PyObject *__pyx_t_12 = NULL; + PyObject *__pyx_t_13 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("_postprocess", 0); + __Pyx_INCREF(__pyx_v_sent); + + /* "IndicTransToolkit/processor.pyx":417 + * + * # Unwrap if sent is a tuple or list + * if isinstance(sent, (tuple, list)): # <<<<<<<<<<<<<< + * sent = sent[0] + * + */ + __pyx_t_2 = PyTuple_Check(__pyx_v_sent); + if (!__pyx_t_2) { + } else { + __pyx_t_1 = __pyx_t_2; + goto __pyx_L4_bool_binop_done; + } + __pyx_t_2 = PyList_Check(__pyx_v_sent); + __pyx_t_1 = __pyx_t_2; + __pyx_L4_bool_binop_done:; + if (__pyx_t_1) { + + /* "IndicTransToolkit/processor.pyx":418 + * # Unwrap if sent is a tuple or list + * if isinstance(sent, (tuple, list)): + * sent = sent[0] # <<<<<<<<<<<<<< + * + * placeholder_entity_map = self._placeholder_entity_maps.get() + */ + __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_sent, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 418, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF_SET(__pyx_v_sent, __pyx_t_3); + __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":417 + * + * # Unwrap if sent is a tuple or list + * if isinstance(sent, (tuple, list)): # <<<<<<<<<<<<<< + * sent = sent[0] + * + */ + } + + /* "IndicTransToolkit/processor.pyx":420 + * sent = sent[0] + * + * placeholder_entity_map = self._placeholder_entity_maps.get() # <<<<<<<<<<<<<< + * lang_code, script_code = lang.split("_", 1) + * iso_lang = self._flores_codes.get(lang, "hi") + */ + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_placeholder_entity_maps, __pyx_n_s_get); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 420, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_5 = NULL; + __pyx_t_6 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_4))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_4, function); + __pyx_t_6 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_5, NULL}; + __pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 0+__pyx_t_6); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 420, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + } + if (!(likely(PyDict_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None) || __Pyx_RaiseUnexpectedTypeError("dict", __pyx_t_3))) __PYX_ERR(0, 420, __pyx_L1_error) + __pyx_v_placeholder_entity_map = ((PyObject*)__pyx_t_3); + __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":421 + * + * placeholder_entity_map = self._placeholder_entity_maps.get() + * lang_code, script_code = lang.split("_", 1) # <<<<<<<<<<<<<< + * iso_lang = self._flores_codes.get(lang, "hi") + * + */ + if (unlikely(__pyx_v_lang == Py_None)) { + PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "split"); + __PYX_ERR(0, 421, __pyx_L1_error) + } + __pyx_t_3 = PyUnicode_Split(__pyx_v_lang, __Pyx_NoneAsNull(__pyx_n_u__155), 1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 421, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (1) { + PyObject* sequence = __pyx_t_3; + Py_ssize_t size = __Pyx_PySequence_SIZE(sequence); + if (unlikely(size != 2)) { + if (size > 2) __Pyx_RaiseTooManyValuesError(2); + else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); + __PYX_ERR(0, 421, __pyx_L1_error) + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_4 = PyList_GET_ITEM(sequence, 0); + __pyx_t_5 = PyList_GET_ITEM(sequence, 1); + __Pyx_INCREF(__pyx_t_4); + __Pyx_INCREF(__pyx_t_5); + #else + __pyx_t_4 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 421, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_5 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 421, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + #endif + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + } + if (!(likely(PyUnicode_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_4))) __PYX_ERR(0, 421, __pyx_L1_error) + if (!(likely(PyUnicode_CheckExact(__pyx_t_5))||((__pyx_t_5) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_5))) __PYX_ERR(0, 421, __pyx_L1_error) + __pyx_v_lang_code = ((PyObject*)__pyx_t_4); + __pyx_t_4 = 0; + __pyx_v_script_code = ((PyObject*)__pyx_t_5); + __pyx_t_5 = 0; + + /* "IndicTransToolkit/processor.pyx":422 + * placeholder_entity_map = self._placeholder_entity_maps.get() + * lang_code, script_code = lang.split("_", 1) + * iso_lang = self._flores_codes.get(lang, "hi") # <<<<<<<<<<<<<< + * + * # Fix for Perso-Arabic scripts + */ + if (unlikely(__pyx_v_self->_flores_codes == Py_None)) { + PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "get"); + __PYX_ERR(0, 422, __pyx_L1_error) + } + __pyx_t_3 = __Pyx_PyDict_GetItemDefault(__pyx_v_self->_flores_codes, __pyx_v_lang, __pyx_n_u_hi); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 422, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(PyUnicode_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_3))) __PYX_ERR(0, 422, __pyx_L1_error) + __pyx_v_iso_lang = ((PyObject*)__pyx_t_3); + __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":425 + * + * # Fix for Perso-Arabic scripts + * if script_code in ["Arab", "Aran"]: # <<<<<<<<<<<<<< + * sent = ( + * sent.replace(" ", "") + */ + __Pyx_INCREF(__pyx_v_script_code); + __pyx_t_7 = __pyx_v_script_code; + __pyx_t_2 = (__Pyx_PyUnicode_Equals(__pyx_t_7, __pyx_n_u_Arab, Py_EQ)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 425, __pyx_L1_error) + if (!__pyx_t_2) { + } else { + __pyx_t_1 = __pyx_t_2; + goto __pyx_L7_bool_binop_done; + } + __pyx_t_2 = (__Pyx_PyUnicode_Equals(__pyx_t_7, __pyx_n_u_Aran, Py_EQ)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 425, __pyx_L1_error) + __pyx_t_1 = __pyx_t_2; + __pyx_L7_bool_binop_done:; + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_2 = __pyx_t_1; + if (__pyx_t_2) { + + /* "IndicTransToolkit/processor.pyx":427 + * if script_code in ["Arab", "Aran"]: + * sent = ( + * sent.replace(" ", "") # <<<<<<<<<<<<<< + * .replace(" ", "") + * .replace(" ", "") + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_sent, __pyx_n_s_replace); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 427, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_tuple__158, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 427, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":428 + * sent = ( + * sent.replace(" ", "") + * .replace(" ", "") # <<<<<<<<<<<<<< + * .replace(" ", "") + * .replace("", "") + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_replace); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 428, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_tuple__161, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 428, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":429 + * sent.replace(" ", "") + * .replace(" ", "") + * .replace(" ", "") # <<<<<<<<<<<<<< + * .replace("", "") + * ) + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_replace); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_tuple__164, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 429, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":430 + * .replace(" ", "") + * .replace(" ", "") + * .replace("", "") # <<<<<<<<<<<<<< + * ) + * + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_replace); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_tuple__167, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 430, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF_SET(__pyx_v_sent, __pyx_t_5); + __pyx_t_5 = 0; + + /* "IndicTransToolkit/processor.pyx":425 + * + * # Fix for Perso-Arabic scripts + * if script_code in ["Arab", "Aran"]: # <<<<<<<<<<<<<< + * sent = ( + * sent.replace(" ", "") + */ + } + + /* "IndicTransToolkit/processor.pyx":434 + * + * # Oriya fix + * if lang_code == "ory": # <<<<<<<<<<<<<< + * sent = sent.replace("", "") + * + */ + __pyx_t_2 = (__Pyx_PyUnicode_Equals(__pyx_v_lang_code, __pyx_n_u_ory, Py_EQ)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 434, __pyx_L1_error) + if (__pyx_t_2) { + + /* "IndicTransToolkit/processor.pyx":435 + * # Oriya fix + * if lang_code == "ory": + * sent = sent.replace("", "") # <<<<<<<<<<<<<< + * + * # Restore placeholders + */ + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_sent, __pyx_n_s_replace); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 435, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_5, __pyx_tuple__170, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 435, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __Pyx_DECREF_SET(__pyx_v_sent, __pyx_t_3); + __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":434 + * + * # Oriya fix + * if lang_code == "ory": # <<<<<<<<<<<<<< + * sent = sent.replace("", "") + * + */ + } + + /* "IndicTransToolkit/processor.pyx":438 + * + * # Restore placeholders + * for k, v in placeholder_entity_map.items(): # <<<<<<<<<<<<<< + * sent = sent.replace(k, v) + * + */ + __pyx_t_8 = 0; + if (unlikely(__pyx_v_placeholder_entity_map == Py_None)) { + PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "items"); + __PYX_ERR(0, 438, __pyx_L1_error) + } + __pyx_t_5 = __Pyx_dict_iterator(__pyx_v_placeholder_entity_map, 1, __pyx_n_s_items, (&__pyx_t_9), (&__pyx_t_10)); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 438, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_3); + __pyx_t_3 = __pyx_t_5; + __pyx_t_5 = 0; + while (1) { + __pyx_t_11 = __Pyx_dict_iter_next(__pyx_t_3, __pyx_t_9, &__pyx_t_8, &__pyx_t_5, &__pyx_t_4, NULL, __pyx_t_10); + if (unlikely(__pyx_t_11 == 0)) break; + if (unlikely(__pyx_t_11 == -1)) __PYX_ERR(0, 438, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GOTREF(__pyx_t_4); + if (!(likely(PyUnicode_CheckExact(__pyx_t_5))||((__pyx_t_5) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_5))) __PYX_ERR(0, 438, __pyx_L1_error) + if (!(likely(PyUnicode_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_4))) __PYX_ERR(0, 438, __pyx_L1_error) + __Pyx_XDECREF_SET(__pyx_v_k, ((PyObject*)__pyx_t_5)); + __pyx_t_5 = 0; + __Pyx_XDECREF_SET(__pyx_v_v, ((PyObject*)__pyx_t_4)); + __pyx_t_4 = 0; + + /* "IndicTransToolkit/processor.pyx":439 + * # Restore placeholders + * for k, v in placeholder_entity_map.items(): + * sent = sent.replace(k, v) # <<<<<<<<<<<<<< + * + * # Detokenize + */ + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_sent, __pyx_n_s_replace); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 439, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_12 = NULL; + __pyx_t_6 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_5))) { + __pyx_t_12 = PyMethod_GET_SELF(__pyx_t_5); + if (likely(__pyx_t_12)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); + __Pyx_INCREF(__pyx_t_12); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_5, function); + __pyx_t_6 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[3] = {__pyx_t_12, __pyx_v_k, __pyx_v_v}; + __pyx_t_4 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_6, 2+__pyx_t_6); + __Pyx_XDECREF(__pyx_t_12); __pyx_t_12 = 0; + if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 439, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + } + __Pyx_DECREF_SET(__pyx_v_sent, __pyx_t_4); + __pyx_t_4 = 0; + } + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":442 + * + * # Detokenize + * if lang == "eng_Latn": # <<<<<<<<<<<<<< + * return self._en_detok.detokenize(sent.split(" ")) + * else: + */ + __pyx_t_2 = (__Pyx_PyUnicode_Equals(__pyx_v_lang, __pyx_n_u_eng_Latn, Py_EQ)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 442, __pyx_L1_error) + if (__pyx_t_2) { + + /* "IndicTransToolkit/processor.pyx":443 + * # Detokenize + * if lang == "eng_Latn": + * return self._en_detok.detokenize(sent.split(" ")) # <<<<<<<<<<<<<< + * else: + * xlated = self._xliterator.transliterate(sent, "hi", iso_lang) + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_en_detok, __pyx_n_s_detokenize); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 443, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_v_sent, __pyx_n_s_split); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 443, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_12); + __pyx_t_13 = NULL; + __pyx_t_6 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_12))) { + __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_12); + if (likely(__pyx_t_13)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_12); + __Pyx_INCREF(__pyx_t_13); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_12, function); + __pyx_t_6 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_13, __pyx_kp_u__139}; + __pyx_t_5 = __Pyx_PyObject_FastCall(__pyx_t_12, __pyx_callargs+1-__pyx_t_6, 1+__pyx_t_6); + __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 443, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; + } + __pyx_t_12 = NULL; + __pyx_t_6 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_4))) { + __pyx_t_12 = PyMethod_GET_SELF(__pyx_t_4); + if (likely(__pyx_t_12)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); + __Pyx_INCREF(__pyx_t_12); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_4, function); + __pyx_t_6 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_12, __pyx_t_5}; + __pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 1+__pyx_t_6); + __Pyx_XDECREF(__pyx_t_12); __pyx_t_12 = 0; + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 443, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + } + if (!(likely(PyUnicode_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_3))) __PYX_ERR(0, 443, __pyx_L1_error) + __pyx_r = ((PyObject*)__pyx_t_3); + __pyx_t_3 = 0; + goto __pyx_L0; + + /* "IndicTransToolkit/processor.pyx":442 + * + * # Detokenize + * if lang == "eng_Latn": # <<<<<<<<<<<<<< + * return self._en_detok.detokenize(sent.split(" ")) + * else: + */ + } + + /* "IndicTransToolkit/processor.pyx":445 + * return self._en_detok.detokenize(sent.split(" ")) + * else: + * xlated = self._xliterator.transliterate(sent, "hi", iso_lang) # <<<<<<<<<<<<<< + * return indic_detokenize.trivial_detokenize(xlated, iso_lang) + * + */ + /*else*/ { + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_xliterator, __pyx_n_s_transliterate); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 445, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_5 = NULL; + __pyx_t_6 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_4))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_4, function); + __pyx_t_6 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[4] = {__pyx_t_5, __pyx_v_sent, __pyx_n_u_hi, __pyx_v_iso_lang}; + __pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 3+__pyx_t_6); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 445, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + } + if (!(likely(PyUnicode_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_3))) __PYX_ERR(0, 445, __pyx_L1_error) + __pyx_v_xlated = ((PyObject*)__pyx_t_3); + __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":446 + * else: + * xlated = self._xliterator.transliterate(sent, "hi", iso_lang) + * return indic_detokenize.trivial_detokenize(xlated, iso_lang) # <<<<<<<<<<<<<< + * + * # Exposed Method: Preprocess a Batch of Sentences + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_indic_detokenize); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 446, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_trivial_detokenize); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 446, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = NULL; + __pyx_t_6 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_5))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_5); + if (likely(__pyx_t_4)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); + __Pyx_INCREF(__pyx_t_4); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_5, function); + __pyx_t_6 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[3] = {__pyx_t_4, __pyx_v_xlated, __pyx_v_iso_lang}; + __pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_6, 2+__pyx_t_6); + __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; + if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 446, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + } + if (!(likely(PyUnicode_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_3))) __PYX_ERR(0, 446, __pyx_L1_error) + __pyx_r = ((PyObject*)__pyx_t_3); + __pyx_t_3 = 0; + goto __pyx_L0; + } + + /* "IndicTransToolkit/processor.pyx":400 + * + * # Internal Method: Postprocess a Single Sentence + * cdef str _postprocess(self, object sent, str lang) except *: # <<<<<<<<<<<<<< + * """ + * Postprocess a single sentence: + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_12); + __Pyx_XDECREF(__pyx_t_13); + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor._postprocess", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_placeholder_entity_map); + __Pyx_XDECREF(__pyx_v_lang_code); + __Pyx_XDECREF(__pyx_v_script_code); + __Pyx_XDECREF(__pyx_v_iso_lang); + __Pyx_XDECREF(__pyx_v_k); + __Pyx_XDECREF(__pyx_v_v); + __Pyx_XDECREF(__pyx_v_xlated); + __Pyx_XDECREF(__pyx_v_sent); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "IndicTransToolkit/processor.pyx":449 + * + * # Exposed Method: Preprocess a Batch of Sentences + * cpdef list preprocess_batch( # <<<<<<<<<<<<<< + * self, + * List[str] batch, + */ + +static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_3preprocess_batch(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_batch, PyObject *__pyx_v_src_lang, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch *__pyx_optional_args) { + + /* "IndicTransToolkit/processor.pyx":453 + * List[str] batch, + * str src_lang, + * str tgt_lang=None, # <<<<<<<<<<<<<< + * bint is_target=False, + * bint visualize=False + */ + PyObject *__pyx_v_tgt_lang = ((PyObject*)Py_None); + + /* "IndicTransToolkit/processor.pyx":454 + * str src_lang, + * str tgt_lang=None, + * bint is_target=False, # <<<<<<<<<<<<<< + * bint visualize=False + * ): + */ + int __pyx_v_is_target = ((int)0); + + /* "IndicTransToolkit/processor.pyx":455 + * str tgt_lang=None, + * bint is_target=False, + * bint visualize=False # <<<<<<<<<<<<<< + * ): + * """ + */ + int __pyx_v_visualize = ((int)0); + PyObject *__pyx_v_normalizer = 0; + PyObject *__pyx_v_iso_code = 0; + PyObject *__pyx_v_iterator = 0; + int __pyx_v_n; + PyObject *__pyx_7genexpr__pyx_v_s = NULL; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + unsigned int __pyx_t_7; + Py_ssize_t __pyx_t_8; + int __pyx_t_9; + PyObject *(*__pyx_t_10)(PyObject *); + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("preprocess_batch", 1); + if (__pyx_optional_args) { + if (__pyx_optional_args->__pyx_n > 0) { + __pyx_v_tgt_lang = __pyx_optional_args->tgt_lang; + if (__pyx_optional_args->__pyx_n > 1) { + __pyx_v_is_target = __pyx_optional_args->is_target; + if (__pyx_optional_args->__pyx_n > 2) { + __pyx_v_visualize = __pyx_optional_args->visualize; + } + } + } + } + + /* "IndicTransToolkit/processor.pyx":449 + * + * # Exposed Method: Preprocess a Batch of Sentences + * cpdef list preprocess_batch( # <<<<<<<<<<<<<< + * self, + * List[str] batch, + */ + /* Check if called by wrapper */ + if (unlikely(__pyx_skip_dispatch)) ; + /* Check if overridden in Python */ + else if (unlikely((Py_TYPE(((PyObject *)__pyx_v_self))->tp_dictoffset != 0) || __Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)))) { + #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS + static PY_UINT64_T __pyx_tp_dict_version = __PYX_DICT_VERSION_INIT, __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT; + if (unlikely(!__Pyx_object_dict_version_matches(((PyObject *)__pyx_v_self), __pyx_tp_dict_version, __pyx_obj_dict_version))) { + PY_UINT64_T __pyx_typedict_guard = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self)); + #endif + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_preprocess_batch); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 449, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (!__Pyx_IsSameCFunction(__pyx_t_1, (void*) __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_3preprocess_batch)) { + __Pyx_XDECREF(__pyx_r); + __pyx_t_3 = __Pyx_PyBool_FromLong(__pyx_v_is_target); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 449, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = __Pyx_PyBool_FromLong(__pyx_v_visualize); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 449, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_INCREF(__pyx_t_1); + __pyx_t_5 = __pyx_t_1; __pyx_t_6 = NULL; + __pyx_t_7 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_5))) { + __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5); + if (likely(__pyx_t_6)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); + __Pyx_INCREF(__pyx_t_6); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_5, function); + __pyx_t_7 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[6] = {__pyx_t_6, __pyx_v_batch, __pyx_v_src_lang, __pyx_v_tgt_lang, __pyx_t_3, __pyx_t_4}; + __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_7, 5+__pyx_t_7); + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 449, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + } + if (!(likely(PyList_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("list", __pyx_t_2))) __PYX_ERR(0, 449, __pyx_L1_error) + __pyx_r = ((PyObject*)__pyx_t_2); + __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + goto __pyx_L0; + } + #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS + __pyx_tp_dict_version = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self)); + __pyx_obj_dict_version = __Pyx_get_object_dict_version(((PyObject *)__pyx_v_self)); + if (unlikely(__pyx_typedict_guard != __pyx_tp_dict_version)) { + __pyx_tp_dict_version = __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT; + } + #endif + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS + } + #endif + } + + /* "IndicTransToolkit/processor.pyx":461 + * This is exposed for external use. + * """ + * cdef object normalizer = None # <<<<<<<<<<<<<< + * cdef str iso_code = self._flores_codes.get(src_lang, "hi") + * cdef object iterator + */ + __Pyx_INCREF(Py_None); + __pyx_v_normalizer = Py_None; + + /* "IndicTransToolkit/processor.pyx":462 + * """ + * cdef object normalizer = None + * cdef str iso_code = self._flores_codes.get(src_lang, "hi") # <<<<<<<<<<<<<< + * cdef object iterator + * cdef list results + */ + if (unlikely(__pyx_v_self->_flores_codes == Py_None)) { + PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "get"); + __PYX_ERR(0, 462, __pyx_L1_error) + } + __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_self->_flores_codes, __pyx_v_src_lang, __pyx_n_u_hi); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 462, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(PyUnicode_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_t_1))) __PYX_ERR(0, 462, __pyx_L1_error) + __pyx_v_iso_code = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":466 + * cdef list results + * cdef int i + * cdef int n = len(batch) # <<<<<<<<<<<<<< + * + * if src_lang != "eng_Latn": + */ + if (unlikely(__pyx_v_batch == Py_None)) { + PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); + __PYX_ERR(0, 466, __pyx_L1_error) + } + __pyx_t_8 = __Pyx_PyList_GET_SIZE(__pyx_v_batch); if (unlikely(__pyx_t_8 == ((Py_ssize_t)-1))) __PYX_ERR(0, 466, __pyx_L1_error) + __pyx_v_n = __pyx_t_8; + + /* "IndicTransToolkit/processor.pyx":468 + * cdef int n = len(batch) + * + * if src_lang != "eng_Latn": # <<<<<<<<<<<<<< + * normalizer = IndicNormalizerFactory().get_normalizer(iso_code) + * + */ + __pyx_t_9 = (__Pyx_PyUnicode_Equals(__pyx_v_src_lang, __pyx_n_u_eng_Latn, Py_NE)); if (unlikely((__pyx_t_9 < 0))) __PYX_ERR(0, 468, __pyx_L1_error) + if (__pyx_t_9) { + + /* "IndicTransToolkit/processor.pyx":469 + * + * if src_lang != "eng_Latn": + * normalizer = IndicNormalizerFactory().get_normalizer(iso_code) # <<<<<<<<<<<<<< + * + * if visualize: + */ + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_IndicNormalizerFactory); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 469, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_4 = NULL; + __pyx_t_7 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_5))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_5); + if (likely(__pyx_t_4)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); + __Pyx_INCREF(__pyx_t_4); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_5, function); + __pyx_t_7 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_4, NULL}; + __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_7, 0+__pyx_t_7); + __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; + if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 469, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + } + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_get_normalizer); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 469, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = NULL; + __pyx_t_7 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_5))) { + __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_5); + if (likely(__pyx_t_2)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); + __Pyx_INCREF(__pyx_t_2); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_5, function); + __pyx_t_7 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_2, __pyx_v_iso_code}; + __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_7, 1+__pyx_t_7); + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 469, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + } + __Pyx_DECREF_SET(__pyx_v_normalizer, __pyx_t_1); + __pyx_t_1 = 0; + + /* "IndicTransToolkit/processor.pyx":468 + * cdef int n = len(batch) + * + * if src_lang != "eng_Latn": # <<<<<<<<<<<<<< + * normalizer = IndicNormalizerFactory().get_normalizer(iso_code) + * + */ + } + + /* "IndicTransToolkit/processor.pyx":471 + * normalizer = IndicNormalizerFactory().get_normalizer(iso_code) + * + * if visualize: # <<<<<<<<<<<<<< + * iterator = tqdm(batch, total=n, desc=f" | > Pre-processing {src_lang}", unit="line") + * else: + */ + if (__pyx_v_visualize) { + + /* "IndicTransToolkit/processor.pyx":472 + * + * if visualize: + * iterator = tqdm(batch, total=n, desc=f" | > Pre-processing {src_lang}", unit="line") # <<<<<<<<<<<<<< + * else: + * iterator = batch + */ + __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_tqdm); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 472, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 472, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_INCREF(__pyx_v_batch); + __Pyx_GIVEREF(__pyx_v_batch); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_v_batch)) __PYX_ERR(0, 472, __pyx_L1_error); + __pyx_t_2 = __Pyx_PyDict_NewPresized(3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 472, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_n); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 472, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + if (PyDict_SetItem(__pyx_t_2, __pyx_n_s_total, __pyx_t_4) < 0) __PYX_ERR(0, 472, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = __Pyx_PyUnicode_Unicode(__pyx_v_src_lang); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 472, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = __Pyx_PyUnicode_Concat(__pyx_kp_u_Pre_processing, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 472, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (PyDict_SetItem(__pyx_t_2, __pyx_n_s_desc, __pyx_t_3) < 0) __PYX_ERR(0, 472, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (PyDict_SetItem(__pyx_t_2, __pyx_n_s_unit, __pyx_n_u_line) < 0) __PYX_ERR(0, 472, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_5, __pyx_t_2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 472, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_v_iterator = __pyx_t_3; + __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":471 + * normalizer = IndicNormalizerFactory().get_normalizer(iso_code) + * + * if visualize: # <<<<<<<<<<<<<< + * iterator = tqdm(batch, total=n, desc=f" | > Pre-processing {src_lang}", unit="line") + * else: + */ + goto __pyx_L4; + } + + /* "IndicTransToolkit/processor.pyx":474 + * iterator = tqdm(batch, total=n, desc=f" | > Pre-processing {src_lang}", unit="line") + * else: + * iterator = batch # <<<<<<<<<<<<<< + * + * return [self._preprocess(s, src_lang, tgt_lang, normalizer, is_target) for s in iterator] + */ + /*else*/ { + __Pyx_INCREF(__pyx_v_batch); + __pyx_v_iterator = __pyx_v_batch; + } + __pyx_L4:; + + /* "IndicTransToolkit/processor.pyx":476 + * iterator = batch + * + * return [self._preprocess(s, src_lang, tgt_lang, normalizer, is_target) for s in iterator] # <<<<<<<<<<<<<< + * + * # Exposed Method: Postprocess a Batch of Sentences + */ + __Pyx_XDECREF(__pyx_r); + { /* enter inner scope */ + __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 476, __pyx_L7_error) + __Pyx_GOTREF(__pyx_t_3); + if (likely(PyList_CheckExact(__pyx_v_iterator)) || PyTuple_CheckExact(__pyx_v_iterator)) { + __pyx_t_2 = __pyx_v_iterator; __Pyx_INCREF(__pyx_t_2); + __pyx_t_8 = 0; + __pyx_t_10 = NULL; + } else { + __pyx_t_8 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_iterator); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 476, __pyx_L7_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_10 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_2); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 476, __pyx_L7_error) + } + for (;;) { + if (likely(!__pyx_t_10)) { + if (likely(PyList_CheckExact(__pyx_t_2))) { + { + Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_2); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 476, __pyx_L7_error) + #endif + if (__pyx_t_8 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_5 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely((0 < 0))) __PYX_ERR(0, 476, __pyx_L7_error) + #else + __pyx_t_5 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 476, __pyx_L7_error) + __Pyx_GOTREF(__pyx_t_5); + #endif + } else { + { + Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_2); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 476, __pyx_L7_error) + #endif + if (__pyx_t_8 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely((0 < 0))) __PYX_ERR(0, 476, __pyx_L7_error) + #else + __pyx_t_5 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 476, __pyx_L7_error) + __Pyx_GOTREF(__pyx_t_5); + #endif + } + } else { + __pyx_t_5 = __pyx_t_10(__pyx_t_2); + if (unlikely(!__pyx_t_5)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 476, __pyx_L7_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_5); + } + __Pyx_XDECREF_SET(__pyx_7genexpr__pyx_v_s, __pyx_t_5); + __pyx_t_5 = 0; + if (!(likely(PyUnicode_CheckExact(__pyx_7genexpr__pyx_v_s))||((__pyx_7genexpr__pyx_v_s) == Py_None) || __Pyx_RaiseUnexpectedTypeError("unicode", __pyx_7genexpr__pyx_v_s))) __PYX_ERR(0, 476, __pyx_L7_error) + __pyx_t_5 = ((struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self->__pyx_vtab)->_preprocess(__pyx_v_self, ((PyObject*)__pyx_7genexpr__pyx_v_s), __pyx_v_src_lang, __pyx_v_tgt_lang, __pyx_v_normalizer, __pyx_v_is_target); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 476, __pyx_L7_error) + __Pyx_GOTREF(__pyx_t_5); + if (unlikely(__Pyx_ListComp_Append(__pyx_t_3, (PyObject*)__pyx_t_5))) __PYX_ERR(0, 476, __pyx_L7_error) + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + } + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_7genexpr__pyx_v_s); __pyx_7genexpr__pyx_v_s = 0; + goto __pyx_L11_exit_scope; + __pyx_L7_error:; + __Pyx_XDECREF(__pyx_7genexpr__pyx_v_s); __pyx_7genexpr__pyx_v_s = 0; + goto __pyx_L1_error; + __pyx_L11_exit_scope:; + } /* exit inner scope */ + __pyx_r = ((PyObject*)__pyx_t_3); + __pyx_t_3 = 0; + goto __pyx_L0; + + /* "IndicTransToolkit/processor.pyx":449 + * + * # Exposed Method: Preprocess a Batch of Sentences + * cpdef list preprocess_batch( # <<<<<<<<<<<<<< + * self, + * List[str] batch, + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.preprocess_batch", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_normalizer); + __Pyx_XDECREF(__pyx_v_iso_code); + __Pyx_XDECREF(__pyx_v_iterator); + __Pyx_XDECREF(__pyx_7genexpr__pyx_v_s); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_3preprocess_batch(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +PyDoc_STRVAR(__pyx_doc_17IndicTransToolkit_9processor_14IndicProcessor_2preprocess_batch, "\n Preprocess an array of sentences (normalize, tokenize, transliterate).\n This is exposed for external use.\n "); +static PyMethodDef __pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_3preprocess_batch = {"preprocess_batch", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_3preprocess_batch, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_17IndicTransToolkit_9processor_14IndicProcessor_2preprocess_batch}; +static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_3preprocess_batch(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + PyObject *__pyx_v_batch = 0; + PyObject *__pyx_v_src_lang = 0; + PyObject *__pyx_v_tgt_lang = 0; + int __pyx_v_is_target; + int __pyx_v_visualize; + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[5] = {0,0,0,0,0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("preprocess_batch (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_batch,&__pyx_n_s_src_lang,&__pyx_n_s_tgt_lang,&__pyx_n_s_is_target,&__pyx_n_s_visualize,0}; + + /* "IndicTransToolkit/processor.pyx":453 + * List[str] batch, + * str src_lang, + * str tgt_lang=None, # <<<<<<<<<<<<<< + * bint is_target=False, + * bint visualize=False + */ + values[2] = __Pyx_Arg_NewRef_FASTCALL(((PyObject*)Py_None)); + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 5: values[4] = __Pyx_Arg_FASTCALL(__pyx_args, 4); + CYTHON_FALLTHROUGH; + case 4: values[3] = __Pyx_Arg_FASTCALL(__pyx_args, 3); + CYTHON_FALLTHROUGH; + case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_batch)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 449, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + CYTHON_FALLTHROUGH; + case 1: + if (likely((values[1] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_src_lang)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[1]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 449, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("preprocess_batch", 0, 2, 5, 1); __PYX_ERR(0, 449, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 2: + if (kw_args > 0) { + PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tgt_lang); + if (value) { values[2] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 449, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 3: + if (kw_args > 0) { + PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_is_target); + if (value) { values[3] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 449, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 4: + if (kw_args > 0) { + PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_visualize); + if (value) { values[4] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 449, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "preprocess_batch") < 0)) __PYX_ERR(0, 449, __pyx_L3_error) + } + } else { + switch (__pyx_nargs) { + case 5: values[4] = __Pyx_Arg_FASTCALL(__pyx_args, 4); + CYTHON_FALLTHROUGH; + case 4: values[3] = __Pyx_Arg_FASTCALL(__pyx_args, 3); + CYTHON_FALLTHROUGH; + case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + break; + default: goto __pyx_L5_argtuple_error; + } + } + __pyx_v_batch = ((PyObject*)values[0]); + __pyx_v_src_lang = ((PyObject*)values[1]); + __pyx_v_tgt_lang = ((PyObject*)values[2]); + if (values[3]) { + __pyx_v_is_target = __Pyx_PyObject_IsTrue(values[3]); if (unlikely((__pyx_v_is_target == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 454, __pyx_L3_error) + } else { + + /* "IndicTransToolkit/processor.pyx":454 + * str src_lang, + * str tgt_lang=None, + * bint is_target=False, # <<<<<<<<<<<<<< + * bint visualize=False + * ): + */ + __pyx_v_is_target = ((int)0); + } + if (values[4]) { + __pyx_v_visualize = __Pyx_PyObject_IsTrue(values[4]); if (unlikely((__pyx_v_visualize == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 455, __pyx_L3_error) + } else { + + /* "IndicTransToolkit/processor.pyx":455 + * str tgt_lang=None, + * bint is_target=False, + * bint visualize=False # <<<<<<<<<<<<<< + * ): + * """ + */ + __pyx_v_visualize = ((int)0); + } + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("preprocess_batch", 0, 2, 5, __pyx_nargs); __PYX_ERR(0, 449, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.preprocess_batch", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_batch), (&PyList_Type), 1, "batch", 1))) __PYX_ERR(0, 451, __pyx_L1_error) + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_src_lang), (&PyUnicode_Type), 1, "src_lang", 1))) __PYX_ERR(0, 452, __pyx_L1_error) + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tgt_lang), (&PyUnicode_Type), 1, "tgt_lang", 1))) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_r = __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_2preprocess_batch(((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self), __pyx_v_batch, __pyx_v_src_lang, __pyx_v_tgt_lang, __pyx_v_is_target, __pyx_v_visualize); + + /* "IndicTransToolkit/processor.pyx":449 + * + * # Exposed Method: Preprocess a Batch of Sentences + * cpdef list preprocess_batch( # <<<<<<<<<<<<<< + * self, + * List[str] batch, + */ + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_2preprocess_batch(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_batch, PyObject *__pyx_v_src_lang, PyObject *__pyx_v_tgt_lang, int __pyx_v_is_target, int __pyx_v_visualize) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch __pyx_t_2; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("preprocess_batch", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_2.__pyx_n = 3; + __pyx_t_2.tgt_lang = __pyx_v_tgt_lang; + __pyx_t_2.is_target = __pyx_v_is_target; + __pyx_t_2.visualize = __pyx_v_visualize; + __pyx_t_1 = __pyx_vtabptr_17IndicTransToolkit_9processor_IndicProcessor->preprocess_batch(__pyx_v_self, __pyx_v_batch, __pyx_v_src_lang, 1, &__pyx_t_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 449, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.preprocess_batch", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "IndicTransToolkit/processor.pyx":479 + * + * # Exposed Method: Postprocess a Batch of Sentences + * cpdef list postprocess_batch( # <<<<<<<<<<<<<< + * self, + * List[str] sents, + */ + +static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_5postprocess_batch(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +static PyObject *__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sents, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch *__pyx_optional_args) { + PyObject *__pyx_v_lang = ((PyObject*)__pyx_n_u_hin_Deva); + + /* "IndicTransToolkit/processor.pyx":483 + * List[str] sents, + * str lang="hin_Deva", + * bint visualize=False # <<<<<<<<<<<<<< + * ): + * """ + */ + int __pyx_v_visualize = ((int)0); + PyObject *__pyx_v_iterator = 0; + PyObject *__pyx_v_results = 0; + int __pyx_v_n; + PyObject *__pyx_8genexpr1__pyx_v_s = NULL; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + unsigned int __pyx_t_6; + Py_ssize_t __pyx_t_7; + PyObject *(*__pyx_t_8)(PyObject *); + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("postprocess_batch", 1); + if (__pyx_optional_args) { + if (__pyx_optional_args->__pyx_n > 0) { + __pyx_v_lang = __pyx_optional_args->lang; + if (__pyx_optional_args->__pyx_n > 1) { + __pyx_v_visualize = __pyx_optional_args->visualize; + } + } + } + + /* "IndicTransToolkit/processor.pyx":479 + * + * # Exposed Method: Postprocess a Batch of Sentences + * cpdef list postprocess_batch( # <<<<<<<<<<<<<< + * self, + * List[str] sents, + */ + /* Check if called by wrapper */ + if (unlikely(__pyx_skip_dispatch)) ; + /* Check if overridden in Python */ + else if (unlikely((Py_TYPE(((PyObject *)__pyx_v_self))->tp_dictoffset != 0) || __Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)))) { + #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS + static PY_UINT64_T __pyx_tp_dict_version = __PYX_DICT_VERSION_INIT, __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT; + if (unlikely(!__Pyx_object_dict_version_matches(((PyObject *)__pyx_v_self), __pyx_tp_dict_version, __pyx_obj_dict_version))) { + PY_UINT64_T __pyx_typedict_guard = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self)); + #endif + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_postprocess_batch); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 479, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (!__Pyx_IsSameCFunction(__pyx_t_1, (void*) __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_5postprocess_batch)) { + __Pyx_XDECREF(__pyx_r); + __pyx_t_3 = __Pyx_PyBool_FromLong(__pyx_v_visualize); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 479, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_INCREF(__pyx_t_1); + __pyx_t_4 = __pyx_t_1; __pyx_t_5 = NULL; + __pyx_t_6 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_4))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_4, function); + __pyx_t_6 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[4] = {__pyx_t_5, __pyx_v_sents, __pyx_v_lang, __pyx_t_3}; + __pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_6, 3+__pyx_t_6); + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 479, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + } + if (!(likely(PyList_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("list", __pyx_t_2))) __PYX_ERR(0, 479, __pyx_L1_error) + __pyx_r = ((PyObject*)__pyx_t_2); + __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + goto __pyx_L0; + } + #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS + __pyx_tp_dict_version = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self)); + __pyx_obj_dict_version = __Pyx_get_object_dict_version(((PyObject *)__pyx_v_self)); + if (unlikely(__pyx_typedict_guard != __pyx_tp_dict_version)) { + __pyx_tp_dict_version = __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT; + } + #endif + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS + } + #endif + } + + /* "IndicTransToolkit/processor.pyx":493 + * cdef list results + * cdef int i + * cdef int n = len(sents) # <<<<<<<<<<<<<< + * + * if visualize: + */ + if (unlikely(__pyx_v_sents == Py_None)) { + PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); + __PYX_ERR(0, 493, __pyx_L1_error) + } + __pyx_t_7 = __Pyx_PyList_GET_SIZE(__pyx_v_sents); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 493, __pyx_L1_error) + __pyx_v_n = __pyx_t_7; + + /* "IndicTransToolkit/processor.pyx":495 + * cdef int n = len(sents) + * + * if visualize: # <<<<<<<<<<<<<< + * iterator = tqdm(sents, total=n, desc=f" | > Post-processing {lang}", unit="line") + * else: + */ + if (__pyx_v_visualize) { + + /* "IndicTransToolkit/processor.pyx":496 + * + * if visualize: + * iterator = tqdm(sents, total=n, desc=f" | > Post-processing {lang}", unit="line") # <<<<<<<<<<<<<< + * else: + * iterator = sents + */ + __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_tqdm); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 496, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 496, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_INCREF(__pyx_v_sents); + __Pyx_GIVEREF(__pyx_v_sents); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_sents)) __PYX_ERR(0, 496, __pyx_L1_error); + __pyx_t_4 = __Pyx_PyDict_NewPresized(3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 496, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_n); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 496, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_t_4, __pyx_n_s_total, __pyx_t_3) < 0) __PYX_ERR(0, 496, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyUnicode_Unicode(__pyx_v_lang); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 496, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_5 = __Pyx_PyUnicode_Concat(__pyx_kp_u_Post_processing, __pyx_t_3); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 496, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (PyDict_SetItem(__pyx_t_4, __pyx_n_s_desc, __pyx_t_5) < 0) __PYX_ERR(0, 496, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (PyDict_SetItem(__pyx_t_4, __pyx_n_s_unit, __pyx_n_u_line) < 0) __PYX_ERR(0, 496, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_2, __pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 496, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_v_iterator = __pyx_t_5; + __pyx_t_5 = 0; + + /* "IndicTransToolkit/processor.pyx":495 + * cdef int n = len(sents) + * + * if visualize: # <<<<<<<<<<<<<< + * iterator = tqdm(sents, total=n, desc=f" | > Post-processing {lang}", unit="line") + * else: + */ + goto __pyx_L3; + } + + /* "IndicTransToolkit/processor.pyx":498 + * iterator = tqdm(sents, total=n, desc=f" | > Post-processing {lang}", unit="line") + * else: + * iterator = sents # <<<<<<<<<<<<<< + * + * results = [self._postprocess(s, lang) for s in iterator] + */ + /*else*/ { + __Pyx_INCREF(__pyx_v_sents); + __pyx_v_iterator = __pyx_v_sents; + } + __pyx_L3:; + + /* "IndicTransToolkit/processor.pyx":500 + * iterator = sents + * + * results = [self._postprocess(s, lang) for s in iterator] # <<<<<<<<<<<<<< + * self._placeholder_entity_maps.queue.clear() + * + */ + { /* enter inner scope */ + __pyx_t_5 = PyList_New(0); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 500, __pyx_L6_error) + __Pyx_GOTREF(__pyx_t_5); + if (likely(PyList_CheckExact(__pyx_v_iterator)) || PyTuple_CheckExact(__pyx_v_iterator)) { + __pyx_t_4 = __pyx_v_iterator; __Pyx_INCREF(__pyx_t_4); + __pyx_t_7 = 0; + __pyx_t_8 = NULL; + } else { + __pyx_t_7 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_v_iterator); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 500, __pyx_L6_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_8 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_4); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 500, __pyx_L6_error) + } + for (;;) { + if (likely(!__pyx_t_8)) { + if (likely(PyList_CheckExact(__pyx_t_4))) { + { + Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_4); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 500, __pyx_L6_error) + #endif + if (__pyx_t_7 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_2 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_7); __Pyx_INCREF(__pyx_t_2); __pyx_t_7++; if (unlikely((0 < 0))) __PYX_ERR(0, 500, __pyx_L6_error) + #else + __pyx_t_2 = __Pyx_PySequence_ITEM(__pyx_t_4, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 500, __pyx_L6_error) + __Pyx_GOTREF(__pyx_t_2); + #endif + } else { + { + Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_4); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 500, __pyx_L6_error) + #endif + if (__pyx_t_7 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_2 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_7); __Pyx_INCREF(__pyx_t_2); __pyx_t_7++; if (unlikely((0 < 0))) __PYX_ERR(0, 500, __pyx_L6_error) + #else + __pyx_t_2 = __Pyx_PySequence_ITEM(__pyx_t_4, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 500, __pyx_L6_error) + __Pyx_GOTREF(__pyx_t_2); + #endif + } + } else { + __pyx_t_2 = __pyx_t_8(__pyx_t_4); + if (unlikely(!__pyx_t_2)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 500, __pyx_L6_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_2); + } + __Pyx_XDECREF_SET(__pyx_8genexpr1__pyx_v_s, __pyx_t_2); + __pyx_t_2 = 0; + __pyx_t_2 = ((struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self->__pyx_vtab)->_postprocess(__pyx_v_self, __pyx_8genexpr1__pyx_v_s, __pyx_v_lang); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 500, __pyx_L6_error) + __Pyx_GOTREF(__pyx_t_2); + if (unlikely(__Pyx_ListComp_Append(__pyx_t_5, (PyObject*)__pyx_t_2))) __PYX_ERR(0, 500, __pyx_L6_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + } + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_XDECREF(__pyx_8genexpr1__pyx_v_s); __pyx_8genexpr1__pyx_v_s = 0; + goto __pyx_L10_exit_scope; + __pyx_L6_error:; + __Pyx_XDECREF(__pyx_8genexpr1__pyx_v_s); __pyx_8genexpr1__pyx_v_s = 0; + goto __pyx_L1_error; + __pyx_L10_exit_scope:; + } /* exit inner scope */ + __pyx_v_results = ((PyObject*)__pyx_t_5); + __pyx_t_5 = 0; + + /* "IndicTransToolkit/processor.pyx":501 + * + * results = [self._postprocess(s, lang) for s in iterator] + * self._placeholder_entity_maps.queue.clear() # <<<<<<<<<<<<<< + * + * return results + */ + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self->_placeholder_entity_maps, __pyx_n_s_queue); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 501, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_clear); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 501, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = NULL; + __pyx_t_6 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_2))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_2); + if (likely(__pyx_t_4)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2); + __Pyx_INCREF(__pyx_t_4); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_2, function); + __pyx_t_6 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_4, NULL}; + __pyx_t_5 = __Pyx_PyObject_FastCall(__pyx_t_2, __pyx_callargs+1-__pyx_t_6, 0+__pyx_t_6); + __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 501, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + } + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + + /* "IndicTransToolkit/processor.pyx":503 + * self._placeholder_entity_maps.queue.clear() + * + * return results # <<<<<<<<<<<<<< + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_results); + __pyx_r = __pyx_v_results; + goto __pyx_L0; + + /* "IndicTransToolkit/processor.pyx":479 + * + * # Exposed Method: Postprocess a Batch of Sentences + * cpdef list postprocess_batch( # <<<<<<<<<<<<<< + * self, + * List[str] sents, + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.postprocess_batch", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_iterator); + __Pyx_XDECREF(__pyx_v_results); + __Pyx_XDECREF(__pyx_8genexpr1__pyx_v_s); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_5postprocess_batch(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +PyDoc_STRVAR(__pyx_doc_17IndicTransToolkit_9processor_14IndicProcessor_4postprocess_batch, "\n Postprocess a batch of sentences:\n Restore placeholders, fix script issues, and detokenize.\n This is exposed for external use.\n "); +static PyMethodDef __pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_5postprocess_batch = {"postprocess_batch", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_5postprocess_batch, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_17IndicTransToolkit_9processor_14IndicProcessor_4postprocess_batch}; +static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_5postprocess_batch(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + PyObject *__pyx_v_sents = 0; + PyObject *__pyx_v_lang = 0; + int __pyx_v_visualize; + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[3] = {0,0,0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("postprocess_batch (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_sents,&__pyx_n_s_lang,&__pyx_n_s_visualize,0}; + values[1] = __Pyx_Arg_NewRef_FASTCALL(((PyObject*)__pyx_n_u_hin_Deva)); + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_sents)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 479, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + CYTHON_FALLTHROUGH; + case 1: + if (kw_args > 0) { + PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_lang); + if (value) { values[1] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 479, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 2: + if (kw_args > 0) { + PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_visualize); + if (value) { values[2] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 479, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "postprocess_batch") < 0)) __PYX_ERR(0, 479, __pyx_L3_error) + } + } else { + switch (__pyx_nargs) { + case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + break; + default: goto __pyx_L5_argtuple_error; + } + } + __pyx_v_sents = ((PyObject*)values[0]); + __pyx_v_lang = ((PyObject*)values[1]); + if (values[2]) { + __pyx_v_visualize = __Pyx_PyObject_IsTrue(values[2]); if (unlikely((__pyx_v_visualize == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 483, __pyx_L3_error) + } else { + + /* "IndicTransToolkit/processor.pyx":483 + * List[str] sents, + * str lang="hin_Deva", + * bint visualize=False # <<<<<<<<<<<<<< + * ): + * """ + */ + __pyx_v_visualize = ((int)0); + } + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("postprocess_batch", 0, 1, 3, __pyx_nargs); __PYX_ERR(0, 479, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.postprocess_batch", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_sents), (&PyList_Type), 1, "sents", 1))) __PYX_ERR(0, 481, __pyx_L1_error) + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_lang), (&PyUnicode_Type), 1, "lang", 1))) __PYX_ERR(0, 482, __pyx_L1_error) + __pyx_r = __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_4postprocess_batch(((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self), __pyx_v_sents, __pyx_v_lang, __pyx_v_visualize); + + /* "IndicTransToolkit/processor.pyx":479 + * + * # Exposed Method: Postprocess a Batch of Sentences + * cpdef list postprocess_batch( # <<<<<<<<<<<<<< + * self, + * List[str] sents, + */ + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_4postprocess_batch(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_sents, PyObject *__pyx_v_lang, int __pyx_v_visualize) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch __pyx_t_2; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("postprocess_batch", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_2.__pyx_n = 2; + __pyx_t_2.lang = __pyx_v_lang; + __pyx_t_2.visualize = __pyx_v_visualize; + __pyx_t_1 = __pyx_vtabptr_17IndicTransToolkit_9processor_IndicProcessor->postprocess_batch(__pyx_v_self, __pyx_v_sents, 1, &__pyx_t_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 479, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.postprocess_batch", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "IndicTransToolkit/processor.pyx":21 + * + * cdef class IndicProcessor: + * cdef public bint inference # <<<<<<<<<<<<<< + * + * # Precompiled regex patterns and placeholders + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9inference_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9inference_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_9inference___get__(((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_9inference___get__(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyBool_FromLong(__pyx_v_self->inference); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 21, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.inference.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9inference_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9inference_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_9inference_2__set__(((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_9inference_2__set__(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 21, __pyx_L1_error) + __pyx_v_self->inference = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.inference.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "(tree fragment)":1 + * def __reduce_cython__(self): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_7__reduce_cython__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +static PyMethodDef __pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_7__reduce_cython__ = {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_7__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_7__reduce_cython__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + if (unlikely(__pyx_nargs > 0)) { + __Pyx_RaiseArgtupleInvalid("__reduce_cython__", 1, 0, 0, __pyx_nargs); return NULL;} + if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce_cython__", 0))) return NULL; + __pyx_r = __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_6__reduce_cython__(((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_6__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__reduce_cython__", 1); + + /* "(tree fragment)":2 + * def __reduce_cython__(self): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< + * def __setstate_cython__(self, __pyx_state): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + */ + __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); + __PYX_ERR(1, 2, __pyx_L1_error) + + /* "(tree fragment)":1 + * def __reduce_cython__(self): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "(tree fragment)":3 + * def __reduce_cython__(self): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9__setstate_cython__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +static PyMethodDef __pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_9__setstate_cython__ = {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9__setstate_cython__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + CYTHON_UNUSED PyObject *__pyx_v___pyx_state = 0; + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[1] = {0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_state,0}; + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_state)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 3, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__setstate_cython__") < 0)) __PYX_ERR(1, 3, __pyx_L3_error) + } + } else if (unlikely(__pyx_nargs != 1)) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + } + __pyx_v___pyx_state = values[0]; + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, __pyx_nargs); __PYX_ERR(1, 3, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_8__setstate_cython__(((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)__pyx_v_self), __pyx_v___pyx_state); + + /* function exit code */ + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_17IndicTransToolkit_9processor_14IndicProcessor_8__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__setstate_cython__", 1); + + /* "(tree fragment)":4 + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< + */ + __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); + __PYX_ERR(1, 4, __pyx_L1_error) + + /* "(tree fragment)":3 + * def __reduce_cython__(self): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_AddTraceback("IndicTransToolkit.processor.IndicProcessor.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} +static struct __pyx_vtabstruct_17IndicTransToolkit_9processor_IndicProcessor __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor; + +static PyObject *__pyx_tp_new_17IndicTransToolkit_9processor_IndicProcessor(PyTypeObject *t, PyObject *a, PyObject *k) { + struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *p; + PyObject *o; + #if CYTHON_COMPILING_IN_LIMITED_API + allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); + o = alloc_func(t, 0); + #else + if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { + o = (*t->tp_alloc)(t, 0); + } else { + o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); + } + if (unlikely(!o)) return 0; + #endif + p = ((struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)o); + p->__pyx_vtab = __pyx_vtabptr_17IndicTransToolkit_9processor_IndicProcessor; + p->_MULTISPACE_REGEX = Py_None; Py_INCREF(Py_None); + p->_DIGIT_SPACE_PERCENT = Py_None; Py_INCREF(Py_None); + p->_DOUBLE_QUOT_PUNC = Py_None; Py_INCREF(Py_None); + p->_DIGIT_NBSP_DIGIT = Py_None; Py_INCREF(Py_None); + p->_END_BRACKET_SPACE_PUNC_REGEX = Py_None; Py_INCREF(Py_None); + p->_URL_PATTERN = Py_None; Py_INCREF(Py_None); + p->_NUMERAL_PATTERN = Py_None; Py_INCREF(Py_None); + p->_EMAIL_PATTERN = Py_None; Py_INCREF(Py_None); + p->_OTHER_PATTERN = Py_None; Py_INCREF(Py_None); + p->_PUNC_REPLACEMENTS = ((PyObject*)Py_None); Py_INCREF(Py_None); + p->_INDIC_FAILURE_CASES = ((PyObject*)Py_None); Py_INCREF(Py_None); + p->_flores_codes = ((PyObject*)Py_None); Py_INCREF(Py_None); + p->_digits_translation_table = ((PyObject*)Py_None); Py_INCREF(Py_None); + p->_placeholder_entity_maps = Py_None; Py_INCREF(Py_None); + p->_en_tok = Py_None; Py_INCREF(Py_None); + p->_en_normalizer = Py_None; Py_INCREF(Py_None); + p->_en_detok = Py_None; Py_INCREF(Py_None); + p->_xliterator = Py_None; Py_INCREF(Py_None); + if (unlikely(__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_1__cinit__(o, a, k) < 0)) goto bad; + return o; + bad: + Py_DECREF(o); o = 0; + return NULL; +} + +static void __pyx_tp_dealloc_17IndicTransToolkit_9processor_IndicProcessor(PyObject *o) { + struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *p = (struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)o; + #if CYTHON_USE_TP_FINALIZE + if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { + if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_17IndicTransToolkit_9processor_IndicProcessor) { + if (PyObject_CallFinalizerFromDealloc(o)) return; + } + } + #endif + PyObject_GC_UnTrack(o); + Py_CLEAR(p->_MULTISPACE_REGEX); + Py_CLEAR(p->_DIGIT_SPACE_PERCENT); + Py_CLEAR(p->_DOUBLE_QUOT_PUNC); + Py_CLEAR(p->_DIGIT_NBSP_DIGIT); + Py_CLEAR(p->_END_BRACKET_SPACE_PUNC_REGEX); + Py_CLEAR(p->_URL_PATTERN); + Py_CLEAR(p->_NUMERAL_PATTERN); + Py_CLEAR(p->_EMAIL_PATTERN); + Py_CLEAR(p->_OTHER_PATTERN); + Py_CLEAR(p->_PUNC_REPLACEMENTS); + Py_CLEAR(p->_INDIC_FAILURE_CASES); + Py_CLEAR(p->_flores_codes); + Py_CLEAR(p->_digits_translation_table); + Py_CLEAR(p->_placeholder_entity_maps); + Py_CLEAR(p->_en_tok); + Py_CLEAR(p->_en_normalizer); + Py_CLEAR(p->_en_detok); + Py_CLEAR(p->_xliterator); + #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY + (*Py_TYPE(o)->tp_free)(o); + #else + { + freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); + if (tp_free) tp_free(o); + } + #endif +} + +static int __pyx_tp_traverse_17IndicTransToolkit_9processor_IndicProcessor(PyObject *o, visitproc v, void *a) { + int e; + struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *p = (struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)o; + if (p->_MULTISPACE_REGEX) { + e = (*v)(p->_MULTISPACE_REGEX, a); if (e) return e; + } + if (p->_DIGIT_SPACE_PERCENT) { + e = (*v)(p->_DIGIT_SPACE_PERCENT, a); if (e) return e; + } + if (p->_DOUBLE_QUOT_PUNC) { + e = (*v)(p->_DOUBLE_QUOT_PUNC, a); if (e) return e; + } + if (p->_DIGIT_NBSP_DIGIT) { + e = (*v)(p->_DIGIT_NBSP_DIGIT, a); if (e) return e; + } + if (p->_END_BRACKET_SPACE_PUNC_REGEX) { + e = (*v)(p->_END_BRACKET_SPACE_PUNC_REGEX, a); if (e) return e; + } + if (p->_URL_PATTERN) { + e = (*v)(p->_URL_PATTERN, a); if (e) return e; + } + if (p->_NUMERAL_PATTERN) { + e = (*v)(p->_NUMERAL_PATTERN, a); if (e) return e; + } + if (p->_EMAIL_PATTERN) { + e = (*v)(p->_EMAIL_PATTERN, a); if (e) return e; + } + if (p->_OTHER_PATTERN) { + e = (*v)(p->_OTHER_PATTERN, a); if (e) return e; + } + if (p->_PUNC_REPLACEMENTS) { + e = (*v)(p->_PUNC_REPLACEMENTS, a); if (e) return e; + } + if (p->_INDIC_FAILURE_CASES) { + e = (*v)(p->_INDIC_FAILURE_CASES, a); if (e) return e; + } + if (p->_flores_codes) { + e = (*v)(p->_flores_codes, a); if (e) return e; + } + if (p->_digits_translation_table) { + e = (*v)(p->_digits_translation_table, a); if (e) return e; + } + if (p->_placeholder_entity_maps) { + e = (*v)(p->_placeholder_entity_maps, a); if (e) return e; + } + if (p->_en_tok) { + e = (*v)(p->_en_tok, a); if (e) return e; + } + if (p->_en_normalizer) { + e = (*v)(p->_en_normalizer, a); if (e) return e; + } + if (p->_en_detok) { + e = (*v)(p->_en_detok, a); if (e) return e; + } + if (p->_xliterator) { + e = (*v)(p->_xliterator, a); if (e) return e; + } + return 0; +} + +static int __pyx_tp_clear_17IndicTransToolkit_9processor_IndicProcessor(PyObject *o) { + PyObject* tmp; + struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *p = (struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *)o; + tmp = ((PyObject*)p->_MULTISPACE_REGEX); + p->_MULTISPACE_REGEX = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_DIGIT_SPACE_PERCENT); + p->_DIGIT_SPACE_PERCENT = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_DOUBLE_QUOT_PUNC); + p->_DOUBLE_QUOT_PUNC = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_DIGIT_NBSP_DIGIT); + p->_DIGIT_NBSP_DIGIT = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_END_BRACKET_SPACE_PUNC_REGEX); + p->_END_BRACKET_SPACE_PUNC_REGEX = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_URL_PATTERN); + p->_URL_PATTERN = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_NUMERAL_PATTERN); + p->_NUMERAL_PATTERN = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_EMAIL_PATTERN); + p->_EMAIL_PATTERN = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_OTHER_PATTERN); + p->_OTHER_PATTERN = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_PUNC_REPLACEMENTS); + p->_PUNC_REPLACEMENTS = ((PyObject*)Py_None); Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_INDIC_FAILURE_CASES); + p->_INDIC_FAILURE_CASES = ((PyObject*)Py_None); Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_flores_codes); + p->_flores_codes = ((PyObject*)Py_None); Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_digits_translation_table); + p->_digits_translation_table = ((PyObject*)Py_None); Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_placeholder_entity_maps); + p->_placeholder_entity_maps = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_en_tok); + p->_en_tok = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_en_normalizer); + p->_en_normalizer = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_en_detok); + p->_en_detok = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->_xliterator); + p->_xliterator = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + return 0; +} + +static PyObject *__pyx_getprop_17IndicTransToolkit_9processor_14IndicProcessor_inference(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9inference_1__get__(o); +} + +static int __pyx_setprop_17IndicTransToolkit_9processor_14IndicProcessor_inference(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9inference_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyMethodDef __pyx_methods_17IndicTransToolkit_9processor_IndicProcessor[] = { + {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_7__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, + {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_17IndicTransToolkit_9processor_14IndicProcessor_9__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, + {0, 0, 0, 0} +}; + +static struct PyGetSetDef __pyx_getsets_17IndicTransToolkit_9processor_IndicProcessor[] = { + {(char *)"inference", __pyx_getprop_17IndicTransToolkit_9processor_14IndicProcessor_inference, __pyx_setprop_17IndicTransToolkit_9processor_14IndicProcessor_inference, (char *)0, 0}, + {0, 0, 0, 0, 0} +}; +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_type_17IndicTransToolkit_9processor_IndicProcessor_slots[] = { + {Py_tp_dealloc, (void *)__pyx_tp_dealloc_17IndicTransToolkit_9processor_IndicProcessor}, + {Py_tp_traverse, (void *)__pyx_tp_traverse_17IndicTransToolkit_9processor_IndicProcessor}, + {Py_tp_clear, (void *)__pyx_tp_clear_17IndicTransToolkit_9processor_IndicProcessor}, + {Py_tp_methods, (void *)__pyx_methods_17IndicTransToolkit_9processor_IndicProcessor}, + {Py_tp_getset, (void *)__pyx_getsets_17IndicTransToolkit_9processor_IndicProcessor}, + {Py_tp_new, (void *)__pyx_tp_new_17IndicTransToolkit_9processor_IndicProcessor}, + {0, 0}, +}; +static PyType_Spec __pyx_type_17IndicTransToolkit_9processor_IndicProcessor_spec = { + "IndicTransToolkit.processor.IndicProcessor", + sizeof(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor), + 0, + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, + __pyx_type_17IndicTransToolkit_9processor_IndicProcessor_slots, +}; +#else + +static PyTypeObject __pyx_type_17IndicTransToolkit_9processor_IndicProcessor = { + PyVarObject_HEAD_INIT(0, 0) + "IndicTransToolkit.processor.""IndicProcessor", /*tp_name*/ + sizeof(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + __pyx_tp_dealloc_17IndicTransToolkit_9processor_IndicProcessor, /*tp_dealloc*/ + #if PY_VERSION_HEX < 0x030800b4 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030800b4 + 0, /*tp_vectorcall_offset*/ + #endif + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + #if PY_MAJOR_VERSION < 3 + 0, /*tp_compare*/ + #endif + #if PY_MAJOR_VERSION >= 3 + 0, /*tp_as_async*/ + #endif + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + 0, /*tp_doc*/ + __pyx_tp_traverse_17IndicTransToolkit_9processor_IndicProcessor, /*tp_traverse*/ + __pyx_tp_clear_17IndicTransToolkit_9processor_IndicProcessor, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + __pyx_methods_17IndicTransToolkit_9processor_IndicProcessor, /*tp_methods*/ + 0, /*tp_members*/ + __pyx_getsets_17IndicTransToolkit_9processor_IndicProcessor, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + #if !CYTHON_USE_TYPE_SPECS + 0, /*tp_dictoffset*/ + #endif + 0, /*tp_init*/ + 0, /*tp_alloc*/ + __pyx_tp_new_17IndicTransToolkit_9processor_IndicProcessor, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + 0, /*tp_version_tag*/ + #if PY_VERSION_HEX >= 0x030400a1 + #if CYTHON_USE_TP_FINALIZE + 0, /*tp_finalize*/ + #else + NULL, /*tp_finalize*/ + #endif + #endif + #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, /*tp_vectorcall*/ + #endif + #if __PYX_NEED_TP_PRINT_SLOT == 1 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030C0000 + 0, /*tp_watched*/ + #endif + #if PY_VERSION_HEX >= 0x030d00A4 + 0, /*tp_versions_used*/ + #endif + #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, /*tp_pypy_flags*/ + #endif +}; +#endif + +static PyMethodDef __pyx_methods[] = { + {0, 0, 0, 0} +}; +#ifndef CYTHON_SMALL_CODE +#if defined(__clang__) + #define CYTHON_SMALL_CODE +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define CYTHON_SMALL_CODE __attribute__((cold)) +#else + #define CYTHON_SMALL_CODE +#endif +#endif +/* #### Code section: pystring_table ### */ + +static int __Pyx_CreateStringTabAndInitStrings(void) { + __Pyx_StringTabEntry __pyx_string_tab[] = { + {&__pyx_n_u_, __pyx_k_, sizeof(__pyx_k_), 0, 1, 0, 1}, + {&__pyx_kp_u_0, __pyx_k_0, sizeof(__pyx_k_0), 0, 1, 0, 0}, + {&__pyx_kp_u_1, __pyx_k_1, sizeof(__pyx_k_1), 0, 1, 0, 0}, + {&__pyx_kp_u_1_2, __pyx_k_1_2, sizeof(__pyx_k_1_2), 0, 1, 0, 0}, + {&__pyx_kp_u_1_2_2, __pyx_k_1_2_2, sizeof(__pyx_k_1_2_2), 0, 1, 0, 0}, + {&__pyx_kp_u_1_3, __pyx_k_1_3, sizeof(__pyx_k_1_3), 0, 1, 0, 0}, + {&__pyx_kp_u_1_4, __pyx_k_1_4, sizeof(__pyx_k_1_4), 0, 1, 0, 0}, + {&__pyx_kp_u_2, __pyx_k_2, sizeof(__pyx_k_2), 0, 1, 0, 0}, + {&__pyx_kp_u_2_2, __pyx_k_2_2, sizeof(__pyx_k_2_2), 0, 1, 0, 0}, + {&__pyx_kp_u_3, __pyx_k_3, sizeof(__pyx_k_3), 0, 1, 0, 0}, + {&__pyx_kp_u_4, __pyx_k_4, sizeof(__pyx_k_4), 0, 1, 0, 0}, + {&__pyx_kp_u_5, __pyx_k_5, sizeof(__pyx_k_5), 0, 1, 0, 0}, + {&__pyx_kp_u_6, __pyx_k_6, sizeof(__pyx_k_6), 0, 1, 0, 0}, + {&__pyx_kp_u_7, __pyx_k_7, sizeof(__pyx_k_7), 0, 1, 0, 0}, + {&__pyx_kp_u_8, __pyx_k_8, sizeof(__pyx_k_8), 0, 1, 0, 0}, + {&__pyx_kp_u_9, __pyx_k_9, sizeof(__pyx_k_9), 0, 1, 0, 0}, + {&__pyx_kp_u_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2, __pyx_k_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2, sizeof(__pyx_k_A_Za_z0_9___A_Za_z0_9_A_Z_a_z_2), 0, 1, 0, 0}, + {&__pyx_kp_u_A_Za_z0_9_w, __pyx_k_A_Za_z0_9_w, sizeof(__pyx_k_A_Za_z0_9_w), 0, 1, 0, 0}, + {&__pyx_n_u_Arab, __pyx_k_Arab, sizeof(__pyx_k_Arab), 0, 1, 0, 1}, + {&__pyx_n_u_Aran, __pyx_k_Aran, sizeof(__pyx_k_Aran), 0, 1, 0, 1}, + {&__pyx_kp_u_C, __pyx_k_C, sizeof(__pyx_k_C), 0, 1, 0, 0}, + {&__pyx_kp_u_C_2, __pyx_k_C_2, sizeof(__pyx_k_C_2), 0, 1, 0, 0}, + {&__pyx_n_s_Dict, __pyx_k_Dict, sizeof(__pyx_k_Dict), 0, 0, 1, 1}, + {&__pyx_kp_u_ID, __pyx_k_ID, sizeof(__pyx_k_ID), 0, 1, 0, 0}, + {&__pyx_kp_u_ID_2, __pyx_k_ID_2, sizeof(__pyx_k_ID_2), 0, 1, 0, 0}, + {&__pyx_kp_u_ID_3, __pyx_k_ID_3, sizeof(__pyx_k_ID_3), 0, 1, 0, 0}, + {&__pyx_kp_u_ID_4, __pyx_k_ID_4, sizeof(__pyx_k_ID_4), 0, 1, 0, 0}, + {&__pyx_kp_u_ID_5, __pyx_k_ID_5, sizeof(__pyx_k_ID_5), 0, 1, 0, 0}, + {&__pyx_n_s_IndicNormalizerFactory, __pyx_k_IndicNormalizerFactory, sizeof(__pyx_k_IndicNormalizerFactory), 0, 0, 1, 1}, + {&__pyx_n_s_IndicProcessor, __pyx_k_IndicProcessor, sizeof(__pyx_k_IndicProcessor), 0, 0, 1, 1}, + {&__pyx_n_s_IndicProcessor___reduce_cython, __pyx_k_IndicProcessor___reduce_cython, sizeof(__pyx_k_IndicProcessor___reduce_cython), 0, 0, 1, 1}, + {&__pyx_n_s_IndicProcessor___setstate_cython, __pyx_k_IndicProcessor___setstate_cython, sizeof(__pyx_k_IndicProcessor___setstate_cython), 0, 0, 1, 1}, + {&__pyx_n_s_IndicProcessor_postprocess_batch, __pyx_k_IndicProcessor_postprocess_batch, sizeof(__pyx_k_IndicProcessor_postprocess_batch), 0, 0, 1, 1}, + {&__pyx_n_s_IndicProcessor_preprocess_batch, __pyx_k_IndicProcessor_preprocess_batch, sizeof(__pyx_k_IndicProcessor_preprocess_batch), 0, 0, 1, 1}, + {&__pyx_n_s_IndicTransToolkit_processor, __pyx_k_IndicTransToolkit_processor, sizeof(__pyx_k_IndicTransToolkit_processor), 0, 0, 1, 1}, + {&__pyx_kp_s_IndicTransToolkit_processor_pyx, __pyx_k_IndicTransToolkit_processor_pyx, sizeof(__pyx_k_IndicTransToolkit_processor_pyx), 0, 0, 1, 0}, + {&__pyx_n_u_Latn, __pyx_k_Latn, sizeof(__pyx_k_Latn), 0, 1, 0, 1}, + {&__pyx_n_s_List, __pyx_k_List, sizeof(__pyx_k_List), 0, 0, 1, 1}, + {&__pyx_n_s_MosesDetokenizer, __pyx_k_MosesDetokenizer, sizeof(__pyx_k_MosesDetokenizer), 0, 0, 1, 1}, + {&__pyx_n_s_MosesPunctNormalizer, __pyx_k_MosesPunctNormalizer, sizeof(__pyx_k_MosesPunctNormalizer), 0, 0, 1, 1}, + {&__pyx_n_s_MosesTokenizer, __pyx_k_MosesTokenizer, sizeof(__pyx_k_MosesTokenizer), 0, 0, 1, 1}, + {&__pyx_n_u_Mtei, __pyx_k_Mtei, sizeof(__pyx_k_Mtei), 0, 1, 0, 1}, + {&__pyx_kp_u_None, __pyx_k_None, sizeof(__pyx_k_None), 0, 1, 0, 0}, + {&__pyx_n_u_Olck, __pyx_k_Olck, sizeof(__pyx_k_Olck), 0, 1, 0, 1}, + {&__pyx_kp_u_Post_processing, __pyx_k_Post_processing, sizeof(__pyx_k_Post_processing), 0, 1, 0, 0}, + {&__pyx_kp_u_Pre_processing, __pyx_k_Pre_processing, sizeof(__pyx_k_Pre_processing), 0, 1, 0, 0}, + {&__pyx_n_s_Queue, __pyx_k_Queue, sizeof(__pyx_k_Queue), 0, 0, 1, 1}, + {&__pyx_n_s_TypeError, __pyx_k_TypeError, sizeof(__pyx_k_TypeError), 0, 0, 1, 1}, + {&__pyx_n_s_UnicodeIndicTransliterator, __pyx_k_UnicodeIndicTransliterator, sizeof(__pyx_k_UnicodeIndicTransliterator), 0, 0, 1, 1}, + {&__pyx_n_s_Union, __pyx_k_Union, sizeof(__pyx_k_Union), 0, 0, 1, 1}, + {&__pyx_n_u__10, __pyx_k__10, sizeof(__pyx_k__10), 0, 1, 0, 1}, + {&__pyx_n_u__100, __pyx_k__100, sizeof(__pyx_k__100), 0, 1, 0, 1}, + {&__pyx_kp_u__101, __pyx_k__101, sizeof(__pyx_k__101), 0, 1, 0, 0}, + {&__pyx_kp_u__102, __pyx_k__102, sizeof(__pyx_k__102), 0, 1, 0, 0}, + {&__pyx_kp_u__103, __pyx_k__103, sizeof(__pyx_k__103), 0, 1, 0, 0}, + {&__pyx_kp_u__104, __pyx_k__104, sizeof(__pyx_k__104), 0, 1, 0, 0}, + {&__pyx_kp_u__105, __pyx_k__105, sizeof(__pyx_k__105), 0, 1, 0, 0}, + {&__pyx_kp_u__106, __pyx_k__106, sizeof(__pyx_k__106), 0, 1, 0, 0}, + {&__pyx_kp_u__107, __pyx_k__107, sizeof(__pyx_k__107), 0, 1, 0, 0}, + {&__pyx_kp_u__108, __pyx_k__108, sizeof(__pyx_k__108), 0, 1, 0, 0}, + {&__pyx_kp_u__109, __pyx_k__109, sizeof(__pyx_k__109), 0, 1, 0, 0}, + {&__pyx_n_u__11, __pyx_k__11, sizeof(__pyx_k__11), 0, 1, 0, 1}, + {&__pyx_kp_u__110, __pyx_k__110, sizeof(__pyx_k__110), 0, 1, 0, 0}, + {&__pyx_kp_u__111, __pyx_k__111, sizeof(__pyx_k__111), 0, 1, 0, 0}, + {&__pyx_kp_u__112, __pyx_k__112, sizeof(__pyx_k__112), 0, 1, 0, 0}, + {&__pyx_kp_u__113, __pyx_k__113, sizeof(__pyx_k__113), 0, 1, 0, 0}, + {&__pyx_kp_u__114, __pyx_k__114, sizeof(__pyx_k__114), 0, 1, 0, 0}, + {&__pyx_kp_u__115, __pyx_k__115, sizeof(__pyx_k__115), 0, 1, 0, 0}, + {&__pyx_kp_u__116, __pyx_k__116, sizeof(__pyx_k__116), 0, 1, 0, 0}, + {&__pyx_kp_u__117, __pyx_k__117, sizeof(__pyx_k__117), 0, 1, 0, 0}, + {&__pyx_kp_u__118, __pyx_k__118, sizeof(__pyx_k__118), 0, 1, 0, 0}, + {&__pyx_kp_u__119, __pyx_k__119, sizeof(__pyx_k__119), 0, 1, 0, 0}, + {&__pyx_n_u__12, __pyx_k__12, sizeof(__pyx_k__12), 0, 1, 0, 1}, + {&__pyx_kp_u__120, __pyx_k__120, sizeof(__pyx_k__120), 0, 1, 0, 0}, + {&__pyx_kp_u__121, __pyx_k__121, sizeof(__pyx_k__121), 0, 1, 0, 0}, + {&__pyx_kp_u__122, __pyx_k__122, sizeof(__pyx_k__122), 0, 1, 0, 0}, + {&__pyx_kp_u__123, __pyx_k__123, sizeof(__pyx_k__123), 0, 1, 0, 0}, + {&__pyx_kp_u__124, __pyx_k__124, sizeof(__pyx_k__124), 0, 1, 0, 0}, + {&__pyx_kp_u__125, __pyx_k__125, sizeof(__pyx_k__125), 0, 1, 0, 0}, + {&__pyx_kp_u__126, __pyx_k__126, sizeof(__pyx_k__126), 0, 1, 0, 0}, + {&__pyx_kp_u__127, __pyx_k__127, sizeof(__pyx_k__127), 0, 1, 0, 0}, + {&__pyx_kp_u__128, __pyx_k__128, sizeof(__pyx_k__128), 0, 1, 0, 0}, + {&__pyx_kp_u__129, __pyx_k__129, sizeof(__pyx_k__129), 0, 1, 0, 0}, + {&__pyx_n_u__13, __pyx_k__13, sizeof(__pyx_k__13), 0, 1, 0, 1}, + {&__pyx_kp_u__130, __pyx_k__130, sizeof(__pyx_k__130), 0, 1, 0, 0}, + {&__pyx_kp_u__131, __pyx_k__131, sizeof(__pyx_k__131), 0, 1, 0, 0}, + {&__pyx_kp_u__132, __pyx_k__132, sizeof(__pyx_k__132), 0, 1, 0, 0}, + {&__pyx_kp_u__133, __pyx_k__133, sizeof(__pyx_k__133), 0, 1, 0, 0}, + {&__pyx_kp_u__134, __pyx_k__134, sizeof(__pyx_k__134), 0, 1, 0, 0}, + {&__pyx_kp_u__135, __pyx_k__135, sizeof(__pyx_k__135), 0, 1, 0, 0}, + {&__pyx_kp_u__136, __pyx_k__136, sizeof(__pyx_k__136), 0, 1, 0, 0}, + {&__pyx_kp_u__137, __pyx_k__137, sizeof(__pyx_k__137), 0, 1, 0, 0}, + {&__pyx_n_u__138, __pyx_k__138, sizeof(__pyx_k__138), 0, 1, 0, 1}, + {&__pyx_kp_u__139, __pyx_k__139, sizeof(__pyx_k__139), 0, 1, 0, 0}, + {&__pyx_n_u__14, __pyx_k__14, sizeof(__pyx_k__14), 0, 1, 0, 1}, + {&__pyx_kp_u__140, __pyx_k__140, sizeof(__pyx_k__140), 0, 1, 0, 0}, + {&__pyx_kp_u__141, __pyx_k__141, sizeof(__pyx_k__141), 0, 1, 0, 0}, + {&__pyx_kp_u__142, __pyx_k__142, sizeof(__pyx_k__142), 0, 1, 0, 0}, + {&__pyx_kp_u__143, __pyx_k__143, sizeof(__pyx_k__143), 0, 1, 0, 0}, + {&__pyx_kp_u__144, __pyx_k__144, sizeof(__pyx_k__144), 0, 1, 0, 0}, + {&__pyx_kp_u__145, __pyx_k__145, sizeof(__pyx_k__145), 0, 1, 0, 0}, + {&__pyx_kp_u__146, __pyx_k__146, sizeof(__pyx_k__146), 0, 1, 0, 0}, + {&__pyx_kp_u__147, __pyx_k__147, sizeof(__pyx_k__147), 0, 1, 0, 0}, + {&__pyx_kp_u__148, __pyx_k__148, sizeof(__pyx_k__148), 0, 1, 0, 0}, + {&__pyx_kp_u__149, __pyx_k__149, sizeof(__pyx_k__149), 0, 1, 0, 0}, + {&__pyx_n_u__15, __pyx_k__15, sizeof(__pyx_k__15), 0, 1, 0, 1}, + {&__pyx_kp_u__151, __pyx_k__151, sizeof(__pyx_k__151), 0, 1, 0, 0}, + {&__pyx_kp_u__153, __pyx_k__153, sizeof(__pyx_k__153), 0, 1, 0, 0}, + {&__pyx_kp_u__154, __pyx_k__154, sizeof(__pyx_k__154), 0, 1, 0, 0}, + {&__pyx_n_u__155, __pyx_k__155, sizeof(__pyx_k__155), 0, 1, 0, 1}, + {&__pyx_kp_u__156, __pyx_k__156, sizeof(__pyx_k__156), 0, 1, 0, 0}, + {&__pyx_kp_u__157, __pyx_k__157, sizeof(__pyx_k__157), 0, 1, 0, 0}, + {&__pyx_kp_u__159, __pyx_k__159, sizeof(__pyx_k__159), 0, 1, 0, 0}, + {&__pyx_n_u__16, __pyx_k__16, sizeof(__pyx_k__16), 0, 1, 0, 1}, + {&__pyx_kp_u__160, __pyx_k__160, sizeof(__pyx_k__160), 0, 1, 0, 0}, + {&__pyx_kp_u__162, __pyx_k__162, sizeof(__pyx_k__162), 0, 1, 0, 0}, + {&__pyx_kp_u__163, __pyx_k__163, sizeof(__pyx_k__163), 0, 1, 0, 0}, + {&__pyx_kp_u__165, __pyx_k__165, sizeof(__pyx_k__165), 0, 1, 0, 0}, + {&__pyx_n_u__166, __pyx_k__166, sizeof(__pyx_k__166), 0, 1, 0, 1}, + {&__pyx_kp_u__168, __pyx_k__168, sizeof(__pyx_k__168), 0, 1, 0, 0}, + {&__pyx_n_u__169, __pyx_k__169, sizeof(__pyx_k__169), 0, 1, 0, 1}, + {&__pyx_n_u__17, __pyx_k__17, sizeof(__pyx_k__17), 0, 1, 0, 1}, + {&__pyx_n_s__171, __pyx_k__171, sizeof(__pyx_k__171), 0, 0, 1, 1}, + {&__pyx_n_u__18, __pyx_k__18, sizeof(__pyx_k__18), 0, 1, 0, 1}, + {&__pyx_n_s__182, __pyx_k__182, sizeof(__pyx_k__182), 0, 0, 1, 1}, + {&__pyx_n_u__19, __pyx_k__19, sizeof(__pyx_k__19), 0, 1, 0, 1}, + {&__pyx_n_u__2, __pyx_k__2, sizeof(__pyx_k__2), 0, 1, 0, 1}, + {&__pyx_n_u__20, __pyx_k__20, sizeof(__pyx_k__20), 0, 1, 0, 1}, + {&__pyx_n_u__21, __pyx_k__21, sizeof(__pyx_k__21), 0, 1, 0, 1}, + {&__pyx_n_u__22, __pyx_k__22, sizeof(__pyx_k__22), 0, 1, 0, 1}, + {&__pyx_n_u__23, __pyx_k__23, sizeof(__pyx_k__23), 0, 1, 0, 1}, + {&__pyx_n_u__24, __pyx_k__24, sizeof(__pyx_k__24), 0, 1, 0, 1}, + {&__pyx_n_u__25, __pyx_k__25, sizeof(__pyx_k__25), 0, 1, 0, 1}, + {&__pyx_n_u__26, __pyx_k__26, sizeof(__pyx_k__26), 0, 1, 0, 1}, + {&__pyx_n_u__27, __pyx_k__27, sizeof(__pyx_k__27), 0, 1, 0, 1}, + {&__pyx_n_u__28, __pyx_k__28, sizeof(__pyx_k__28), 0, 1, 0, 1}, + {&__pyx_n_u__29, __pyx_k__29, sizeof(__pyx_k__29), 0, 1, 0, 1}, + {&__pyx_n_u__3, __pyx_k__3, sizeof(__pyx_k__3), 0, 1, 0, 1}, + {&__pyx_n_u__30, __pyx_k__30, sizeof(__pyx_k__30), 0, 1, 0, 1}, + {&__pyx_n_u__31, __pyx_k__31, sizeof(__pyx_k__31), 0, 1, 0, 1}, + {&__pyx_n_u__32, __pyx_k__32, sizeof(__pyx_k__32), 0, 1, 0, 1}, + {&__pyx_n_u__33, __pyx_k__33, sizeof(__pyx_k__33), 0, 1, 0, 1}, + {&__pyx_n_u__34, __pyx_k__34, sizeof(__pyx_k__34), 0, 1, 0, 1}, + {&__pyx_n_u__35, __pyx_k__35, sizeof(__pyx_k__35), 0, 1, 0, 1}, + {&__pyx_n_u__36, __pyx_k__36, sizeof(__pyx_k__36), 0, 1, 0, 1}, + {&__pyx_n_u__37, __pyx_k__37, sizeof(__pyx_k__37), 0, 1, 0, 1}, + {&__pyx_n_u__38, __pyx_k__38, sizeof(__pyx_k__38), 0, 1, 0, 1}, + {&__pyx_n_u__39, __pyx_k__39, sizeof(__pyx_k__39), 0, 1, 0, 1}, + {&__pyx_n_u__4, __pyx_k__4, sizeof(__pyx_k__4), 0, 1, 0, 1}, + {&__pyx_n_u__40, __pyx_k__40, sizeof(__pyx_k__40), 0, 1, 0, 1}, + {&__pyx_n_u__41, __pyx_k__41, sizeof(__pyx_k__41), 0, 1, 0, 1}, + {&__pyx_n_u__42, __pyx_k__42, sizeof(__pyx_k__42), 0, 1, 0, 1}, + {&__pyx_n_u__43, __pyx_k__43, sizeof(__pyx_k__43), 0, 1, 0, 1}, + {&__pyx_n_u__44, __pyx_k__44, sizeof(__pyx_k__44), 0, 1, 0, 1}, + {&__pyx_n_u__45, __pyx_k__45, sizeof(__pyx_k__45), 0, 1, 0, 1}, + {&__pyx_n_u__46, __pyx_k__46, sizeof(__pyx_k__46), 0, 1, 0, 1}, + {&__pyx_n_u__47, __pyx_k__47, sizeof(__pyx_k__47), 0, 1, 0, 1}, + {&__pyx_n_u__48, __pyx_k__48, sizeof(__pyx_k__48), 0, 1, 0, 1}, + {&__pyx_n_u__49, __pyx_k__49, sizeof(__pyx_k__49), 0, 1, 0, 1}, + {&__pyx_n_u__5, __pyx_k__5, sizeof(__pyx_k__5), 0, 1, 0, 1}, + {&__pyx_n_u__50, __pyx_k__50, sizeof(__pyx_k__50), 0, 1, 0, 1}, + {&__pyx_n_u__51, __pyx_k__51, sizeof(__pyx_k__51), 0, 1, 0, 1}, + {&__pyx_n_u__52, __pyx_k__52, sizeof(__pyx_k__52), 0, 1, 0, 1}, + {&__pyx_n_u__53, __pyx_k__53, sizeof(__pyx_k__53), 0, 1, 0, 1}, + {&__pyx_n_u__54, __pyx_k__54, sizeof(__pyx_k__54), 0, 1, 0, 1}, + {&__pyx_n_u__55, __pyx_k__55, sizeof(__pyx_k__55), 0, 1, 0, 1}, + {&__pyx_n_u__56, __pyx_k__56, sizeof(__pyx_k__56), 0, 1, 0, 1}, + {&__pyx_n_u__57, __pyx_k__57, sizeof(__pyx_k__57), 0, 1, 0, 1}, + {&__pyx_n_u__58, __pyx_k__58, sizeof(__pyx_k__58), 0, 1, 0, 1}, + {&__pyx_n_u__59, __pyx_k__59, sizeof(__pyx_k__59), 0, 1, 0, 1}, + {&__pyx_n_u__6, __pyx_k__6, sizeof(__pyx_k__6), 0, 1, 0, 1}, + {&__pyx_n_u__60, __pyx_k__60, sizeof(__pyx_k__60), 0, 1, 0, 1}, + {&__pyx_n_u__61, __pyx_k__61, sizeof(__pyx_k__61), 0, 1, 0, 1}, + {&__pyx_n_u__62, __pyx_k__62, sizeof(__pyx_k__62), 0, 1, 0, 1}, + {&__pyx_n_u__63, __pyx_k__63, sizeof(__pyx_k__63), 0, 1, 0, 1}, + {&__pyx_n_u__64, __pyx_k__64, sizeof(__pyx_k__64), 0, 1, 0, 1}, + {&__pyx_n_u__65, __pyx_k__65, sizeof(__pyx_k__65), 0, 1, 0, 1}, + {&__pyx_n_u__66, __pyx_k__66, sizeof(__pyx_k__66), 0, 1, 0, 1}, + {&__pyx_n_u__67, __pyx_k__67, sizeof(__pyx_k__67), 0, 1, 0, 1}, + {&__pyx_n_u__68, __pyx_k__68, sizeof(__pyx_k__68), 0, 1, 0, 1}, + {&__pyx_n_u__69, __pyx_k__69, sizeof(__pyx_k__69), 0, 1, 0, 1}, + {&__pyx_n_u__7, __pyx_k__7, sizeof(__pyx_k__7), 0, 1, 0, 1}, + {&__pyx_n_u__70, __pyx_k__70, sizeof(__pyx_k__70), 0, 1, 0, 1}, + {&__pyx_n_u__71, __pyx_k__71, sizeof(__pyx_k__71), 0, 1, 0, 1}, + {&__pyx_n_u__72, __pyx_k__72, sizeof(__pyx_k__72), 0, 1, 0, 1}, + {&__pyx_n_u__73, __pyx_k__73, sizeof(__pyx_k__73), 0, 1, 0, 1}, + {&__pyx_n_u__74, __pyx_k__74, sizeof(__pyx_k__74), 0, 1, 0, 1}, + {&__pyx_n_u__75, __pyx_k__75, sizeof(__pyx_k__75), 0, 1, 0, 1}, + {&__pyx_n_u__76, __pyx_k__76, sizeof(__pyx_k__76), 0, 1, 0, 1}, + {&__pyx_n_u__77, __pyx_k__77, sizeof(__pyx_k__77), 0, 1, 0, 1}, + {&__pyx_n_u__78, __pyx_k__78, sizeof(__pyx_k__78), 0, 1, 0, 1}, + {&__pyx_n_u__79, __pyx_k__79, sizeof(__pyx_k__79), 0, 1, 0, 1}, + {&__pyx_n_u__8, __pyx_k__8, sizeof(__pyx_k__8), 0, 1, 0, 1}, + {&__pyx_n_u__80, __pyx_k__80, sizeof(__pyx_k__80), 0, 1, 0, 1}, + {&__pyx_n_u__81, __pyx_k__81, sizeof(__pyx_k__81), 0, 1, 0, 1}, + {&__pyx_n_u__82, __pyx_k__82, sizeof(__pyx_k__82), 0, 1, 0, 1}, + {&__pyx_n_u__83, __pyx_k__83, sizeof(__pyx_k__83), 0, 1, 0, 1}, + {&__pyx_n_u__84, __pyx_k__84, sizeof(__pyx_k__84), 0, 1, 0, 1}, + {&__pyx_n_u__85, __pyx_k__85, sizeof(__pyx_k__85), 0, 1, 0, 1}, + {&__pyx_n_u__86, __pyx_k__86, sizeof(__pyx_k__86), 0, 1, 0, 1}, + {&__pyx_n_u__87, __pyx_k__87, sizeof(__pyx_k__87), 0, 1, 0, 1}, + {&__pyx_n_u__88, __pyx_k__88, sizeof(__pyx_k__88), 0, 1, 0, 1}, + {&__pyx_n_u__89, __pyx_k__89, sizeof(__pyx_k__89), 0, 1, 0, 1}, + {&__pyx_n_u__9, __pyx_k__9, sizeof(__pyx_k__9), 0, 1, 0, 1}, + {&__pyx_n_u__90, __pyx_k__90, sizeof(__pyx_k__90), 0, 1, 0, 1}, + {&__pyx_n_u__91, __pyx_k__91, sizeof(__pyx_k__91), 0, 1, 0, 1}, + {&__pyx_n_u__92, __pyx_k__92, sizeof(__pyx_k__92), 0, 1, 0, 1}, + {&__pyx_n_u__93, __pyx_k__93, sizeof(__pyx_k__93), 0, 1, 0, 1}, + {&__pyx_n_u__94, __pyx_k__94, sizeof(__pyx_k__94), 0, 1, 0, 1}, + {&__pyx_n_u__95, __pyx_k__95, sizeof(__pyx_k__95), 0, 1, 0, 1}, + {&__pyx_n_u__96, __pyx_k__96, sizeof(__pyx_k__96), 0, 1, 0, 1}, + {&__pyx_n_u__97, __pyx_k__97, sizeof(__pyx_k__97), 0, 1, 0, 1}, + {&__pyx_n_u__98, __pyx_k__98, sizeof(__pyx_k__98), 0, 1, 0, 1}, + {&__pyx_n_u__99, __pyx_k__99, sizeof(__pyx_k__99), 0, 1, 0, 1}, + {&__pyx_n_u_as, __pyx_k_as, sizeof(__pyx_k_as), 0, 1, 0, 1}, + {&__pyx_n_u_asm_Beng, __pyx_k_asm_Beng, sizeof(__pyx_k_asm_Beng), 0, 1, 0, 1}, + {&__pyx_n_s_asyncio_coroutines, __pyx_k_asyncio_coroutines, sizeof(__pyx_k_asyncio_coroutines), 0, 0, 1, 1}, + {&__pyx_n_u_awa_Deva, __pyx_k_awa_Deva, sizeof(__pyx_k_awa_Deva), 0, 1, 0, 1}, + {&__pyx_kp_u_b_w_https_ftp_w_w_w_b, __pyx_k_b_w_https_ftp_w_w_w_b, sizeof(__pyx_k_b_w_https_ftp_w_w_w_b), 0, 1, 0, 0}, + {&__pyx_n_s_batch, __pyx_k_batch, sizeof(__pyx_k_batch), 0, 0, 1, 1}, + {&__pyx_n_u_ben_Beng, __pyx_k_ben_Beng, sizeof(__pyx_k_ben_Beng), 0, 1, 0, 1}, + {&__pyx_n_u_bho_Deva, __pyx_k_bho_Deva, sizeof(__pyx_k_bho_Deva), 0, 1, 0, 1}, + {&__pyx_n_u_bn, __pyx_k_bn, sizeof(__pyx_k_bn), 0, 1, 0, 1}, + {&__pyx_n_u_brx_Deva, __pyx_k_brx_Deva, sizeof(__pyx_k_brx_Deva), 0, 1, 0, 1}, + {&__pyx_n_s_chr, __pyx_k_chr, sizeof(__pyx_k_chr), 0, 0, 1, 1}, + {&__pyx_n_s_cinit___locals_lambda, __pyx_k_cinit___locals_lambda, sizeof(__pyx_k_cinit___locals_lambda), 0, 0, 1, 1}, + {&__pyx_n_s_clear, __pyx_k_clear, sizeof(__pyx_k_clear), 0, 0, 1, 1}, + {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1}, + {&__pyx_n_s_compile, __pyx_k_compile, sizeof(__pyx_k_compile), 0, 0, 1, 1}, + {&__pyx_kp_u_d, __pyx_k_d, sizeof(__pyx_k_d), 0, 1, 0, 0}, + {&__pyx_kp_u_d_d, __pyx_k_d_d, sizeof(__pyx_k_d_d), 0, 1, 0, 0}, + {&__pyx_kp_u_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d, __pyx_k_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d, sizeof(__pyx_k_d_d_s_s_s_d_d_s_d_d_d_d_d_d_d_d), 0, 1, 0, 0}, + {&__pyx_n_s_desc, __pyx_k_desc, sizeof(__pyx_k_desc), 0, 0, 1, 1}, + {&__pyx_n_s_detokenize, __pyx_k_detokenize, sizeof(__pyx_k_detokenize), 0, 0, 1, 1}, + {&__pyx_kp_u_disable, __pyx_k_disable, sizeof(__pyx_k_disable), 0, 1, 0, 0}, + {&__pyx_n_u_doi_Deva, __pyx_k_doi_Deva, sizeof(__pyx_k_doi_Deva), 0, 1, 0, 1}, + {&__pyx_n_u_en, __pyx_k_en, sizeof(__pyx_k_en), 0, 1, 0, 1}, + {&__pyx_kp_u_enable, __pyx_k_enable, sizeof(__pyx_k_enable), 0, 1, 0, 0}, + {&__pyx_n_u_eng_Latn, __pyx_k_eng_Latn, sizeof(__pyx_k_eng_Latn), 0, 1, 0, 1}, + {&__pyx_n_s_escape, __pyx_k_escape, sizeof(__pyx_k_escape), 0, 0, 1, 1}, + {&__pyx_n_s_findall, __pyx_k_findall, sizeof(__pyx_k_findall), 0, 0, 1, 1}, + {&__pyx_kp_u_gc, __pyx_k_gc, sizeof(__pyx_k_gc), 0, 1, 0, 0}, + {&__pyx_n_s_get, __pyx_k_get, sizeof(__pyx_k_get), 0, 0, 1, 1}, + {&__pyx_n_s_get_normalizer, __pyx_k_get_normalizer, sizeof(__pyx_k_get_normalizer), 0, 0, 1, 1}, + {&__pyx_n_s_getstate, __pyx_k_getstate, sizeof(__pyx_k_getstate), 0, 0, 1, 1}, + {&__pyx_n_u_gom_Deva, __pyx_k_gom_Deva, sizeof(__pyx_k_gom_Deva), 0, 1, 0, 1}, + {&__pyx_n_u_gon_Deva, __pyx_k_gon_Deva, sizeof(__pyx_k_gon_Deva), 0, 1, 0, 1}, + {&__pyx_n_s_group, __pyx_k_group, sizeof(__pyx_k_group), 0, 0, 1, 1}, + {&__pyx_n_u_gu, __pyx_k_gu, sizeof(__pyx_k_gu), 0, 1, 0, 1}, + {&__pyx_n_u_guj_Gujr, __pyx_k_guj_Gujr, sizeof(__pyx_k_guj_Gujr), 0, 1, 0, 1}, + {&__pyx_n_u_hi, __pyx_k_hi, sizeof(__pyx_k_hi), 0, 1, 0, 1}, + {&__pyx_n_u_hin_Deva, __pyx_k_hin_Deva, sizeof(__pyx_k_hin_Deva), 0, 1, 0, 1}, + {&__pyx_n_u_hne_Deva, __pyx_k_hne_Deva, sizeof(__pyx_k_hne_Deva), 0, 1, 0, 1}, + {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, + {&__pyx_n_s_indic_detokenize, __pyx_k_indic_detokenize, sizeof(__pyx_k_indic_detokenize), 0, 0, 1, 1}, + {&__pyx_n_s_indic_tokenize, __pyx_k_indic_tokenize, sizeof(__pyx_k_indic_tokenize), 0, 0, 1, 1}, + {&__pyx_n_s_indicnlp_normalize_indic_normali, __pyx_k_indicnlp_normalize_indic_normali, sizeof(__pyx_k_indicnlp_normalize_indic_normali), 0, 0, 1, 1}, + {&__pyx_n_s_indicnlp_tokenize, __pyx_k_indicnlp_tokenize, sizeof(__pyx_k_indicnlp_tokenize), 0, 0, 1, 1}, + {&__pyx_n_s_indicnlp_transliterate_unicode_t, __pyx_k_indicnlp_transliterate_unicode_t, sizeof(__pyx_k_indicnlp_transliterate_unicode_t), 0, 0, 1, 1}, + {&__pyx_n_s_inference, __pyx_k_inference, sizeof(__pyx_k_inference), 0, 0, 1, 1}, + {&__pyx_n_s_initializing, __pyx_k_initializing, sizeof(__pyx_k_initializing), 0, 0, 1, 1}, + {&__pyx_n_s_is_coroutine, __pyx_k_is_coroutine, sizeof(__pyx_k_is_coroutine), 0, 0, 1, 1}, + {&__pyx_n_s_is_target, __pyx_k_is_target, sizeof(__pyx_k_is_target), 0, 0, 1, 1}, + {&__pyx_kp_u_isenabled, __pyx_k_isenabled, sizeof(__pyx_k_isenabled), 0, 1, 0, 0}, + {&__pyx_n_s_items, __pyx_k_items, sizeof(__pyx_k_items), 0, 0, 1, 1}, + {&__pyx_n_u_kK, __pyx_k_kK, sizeof(__pyx_k_kK), 0, 1, 0, 1}, + {&__pyx_n_u_kan_Knda, __pyx_k_kan_Knda, sizeof(__pyx_k_kan_Knda), 0, 1, 0, 1}, + {&__pyx_n_u_kas_Arab, __pyx_k_kas_Arab, sizeof(__pyx_k_kas_Arab), 0, 1, 0, 1}, + {&__pyx_n_u_kas_Deva, __pyx_k_kas_Deva, sizeof(__pyx_k_kas_Deva), 0, 1, 0, 1}, + {&__pyx_n_u_kha_Latn, __pyx_k_kha_Latn, sizeof(__pyx_k_kha_Latn), 0, 1, 0, 1}, + {&__pyx_n_u_kn, __pyx_k_kn, sizeof(__pyx_k_kn), 0, 1, 0, 1}, + {&__pyx_n_s_lang, __pyx_k_lang, sizeof(__pyx_k_lang), 0, 0, 1, 1}, + {&__pyx_n_u_line, __pyx_k_line, sizeof(__pyx_k_line), 0, 1, 0, 1}, + {&__pyx_n_u_lus_Latn, __pyx_k_lus_Latn, sizeof(__pyx_k_lus_Latn), 0, 1, 0, 1}, + {&__pyx_n_s_m, __pyx_k_m, sizeof(__pyx_k_m), 0, 0, 1, 1}, + {&__pyx_n_u_mag_Deva, __pyx_k_mag_Deva, sizeof(__pyx_k_mag_Deva), 0, 1, 0, 1}, + {&__pyx_n_u_mai_Deva, __pyx_k_mai_Deva, sizeof(__pyx_k_mai_Deva), 0, 1, 0, 1}, + {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, + {&__pyx_n_u_mal_Mlym, __pyx_k_mal_Mlym, sizeof(__pyx_k_mal_Mlym), 0, 1, 0, 1}, + {&__pyx_n_u_mar_Deva, __pyx_k_mar_Deva, sizeof(__pyx_k_mar_Deva), 0, 1, 0, 1}, + {&__pyx_n_u_ml, __pyx_k_ml, sizeof(__pyx_k_ml), 0, 1, 0, 1}, + {&__pyx_n_u_mni_Beng, __pyx_k_mni_Beng, sizeof(__pyx_k_mni_Beng), 0, 1, 0, 1}, + {&__pyx_n_u_mni_Mtei, __pyx_k_mni_Mtei, sizeof(__pyx_k_mni_Mtei), 0, 1, 0, 1}, + {&__pyx_n_u_mr, __pyx_k_mr, sizeof(__pyx_k_mr), 0, 1, 0, 1}, + {&__pyx_kp_u_n, __pyx_k_n, sizeof(__pyx_k_n), 0, 1, 0, 0}, + {&__pyx_kp_u_n_2, __pyx_k_n_2, sizeof(__pyx_k_n_2), 0, 1, 0, 0}, + {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1}, + {&__pyx_n_u_ne, __pyx_k_ne, sizeof(__pyx_k_ne), 0, 1, 0, 1}, + {&__pyx_kp_s_no_default___reduce___due_to_non, __pyx_k_no_default___reduce___due_to_non, sizeof(__pyx_k_no_default___reduce___due_to_non), 0, 0, 1, 0}, + {&__pyx_n_s_normalize, __pyx_k_normalize, sizeof(__pyx_k_normalize), 0, 0, 1, 1}, + {&__pyx_n_u_npi_Deva, __pyx_k_npi_Deva, sizeof(__pyx_k_npi_Deva), 0, 1, 0, 1}, + {&__pyx_n_u_or, __pyx_k_or, sizeof(__pyx_k_or), 0, 1, 0, 1}, + {&__pyx_n_u_ory, __pyx_k_ory, sizeof(__pyx_k_ory), 0, 1, 0, 1}, + {&__pyx_n_u_ory_Orya, __pyx_k_ory_Orya, sizeof(__pyx_k_ory_Orya), 0, 1, 0, 1}, + {&__pyx_n_u_pa, __pyx_k_pa, sizeof(__pyx_k_pa), 0, 1, 0, 1}, + {&__pyx_n_u_pan_Guru, __pyx_k_pan_Guru, sizeof(__pyx_k_pan_Guru), 0, 1, 0, 1}, + {&__pyx_n_s_postprocess_batch, __pyx_k_postprocess_batch, sizeof(__pyx_k_postprocess_batch), 0, 0, 1, 1}, + {&__pyx_n_s_preprocess_batch, __pyx_k_preprocess_batch, sizeof(__pyx_k_preprocess_batch), 0, 0, 1, 1}, + {&__pyx_n_s_put, __pyx_k_put, sizeof(__pyx_k_put), 0, 0, 1, 1}, + {&__pyx_n_s_pyx_state, __pyx_k_pyx_state, sizeof(__pyx_k_pyx_state), 0, 0, 1, 1}, + {&__pyx_n_s_pyx_vtable, __pyx_k_pyx_vtable, sizeof(__pyx_k_pyx_vtable), 0, 0, 1, 1}, + {&__pyx_n_s_queue, __pyx_k_queue, sizeof(__pyx_k_queue), 0, 0, 1, 1}, + {&__pyx_kp_u_r, __pyx_k_r, sizeof(__pyx_k_r), 0, 1, 0, 0}, + {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, + {&__pyx_n_s_re, __pyx_k_re, sizeof(__pyx_k_re), 0, 0, 1, 1}, + {&__pyx_n_s_reduce, __pyx_k_reduce, sizeof(__pyx_k_reduce), 0, 0, 1, 1}, + {&__pyx_n_s_reduce_cython, __pyx_k_reduce_cython, sizeof(__pyx_k_reduce_cython), 0, 0, 1, 1}, + {&__pyx_n_s_reduce_ex, __pyx_k_reduce_ex, sizeof(__pyx_k_reduce_ex), 0, 0, 1, 1}, + {&__pyx_n_s_regex, __pyx_k_regex, sizeof(__pyx_k_regex), 0, 0, 1, 1}, + {&__pyx_n_s_replace, __pyx_k_replace, sizeof(__pyx_k_replace), 0, 0, 1, 1}, + {&__pyx_kp_u_s, __pyx_k_s, sizeof(__pyx_k_s), 0, 1, 0, 0}, + {&__pyx_kp_u_s_2, __pyx_k_s_2, sizeof(__pyx_k_s_2), 0, 1, 0, 0}, + {&__pyx_kp_u_s_3, __pyx_k_s_3, sizeof(__pyx_k_s_3), 0, 1, 0, 0}, + {&__pyx_kp_u_s_s, __pyx_k_s_s, sizeof(__pyx_k_s_s), 0, 1, 0, 0}, + {&__pyx_kp_u_s_s_2, __pyx_k_s_s_2, sizeof(__pyx_k_s_s_2), 0, 1, 0, 0}, + {&__pyx_n_s_sacremoses, __pyx_k_sacremoses, sizeof(__pyx_k_sacremoses), 0, 0, 1, 1}, + {&__pyx_n_u_san_Deva, __pyx_k_san_Deva, sizeof(__pyx_k_san_Deva), 0, 1, 0, 1}, + {&__pyx_n_u_sat_Olck, __pyx_k_sat_Olck, sizeof(__pyx_k_sat_Olck), 0, 1, 0, 1}, + {&__pyx_n_s_self, __pyx_k_self, sizeof(__pyx_k_self), 0, 0, 1, 1}, + {&__pyx_n_s_sents, __pyx_k_sents, sizeof(__pyx_k_sents), 0, 0, 1, 1}, + {&__pyx_n_s_setstate, __pyx_k_setstate, sizeof(__pyx_k_setstate), 0, 0, 1, 1}, + {&__pyx_n_s_setstate_cython, __pyx_k_setstate_cython, sizeof(__pyx_k_setstate_cython), 0, 0, 1, 1}, + {&__pyx_n_u_snd_Arab, __pyx_k_snd_Arab, sizeof(__pyx_k_snd_Arab), 0, 1, 0, 1}, + {&__pyx_n_u_snd_Deva, __pyx_k_snd_Deva, sizeof(__pyx_k_snd_Deva), 0, 1, 0, 1}, + {&__pyx_n_s_spec, __pyx_k_spec, sizeof(__pyx_k_spec), 0, 0, 1, 1}, + {&__pyx_n_s_split, __pyx_k_split, sizeof(__pyx_k_split), 0, 0, 1, 1}, + {&__pyx_n_s_src_lang, __pyx_k_src_lang, sizeof(__pyx_k_src_lang), 0, 0, 1, 1}, + {&__pyx_kp_s_stringsource, __pyx_k_stringsource, sizeof(__pyx_k_stringsource), 0, 0, 1, 0}, + {&__pyx_n_s_strip, __pyx_k_strip, sizeof(__pyx_k_strip), 0, 0, 1, 1}, + {&__pyx_n_s_sub, __pyx_k_sub, sizeof(__pyx_k_sub), 0, 0, 1, 1}, + {&__pyx_n_u_ta, __pyx_k_ta, sizeof(__pyx_k_ta), 0, 1, 0, 1}, + {&__pyx_n_u_tam_Taml, __pyx_k_tam_Taml, sizeof(__pyx_k_tam_Taml), 0, 1, 0, 1}, + {&__pyx_n_u_te, __pyx_k_te, sizeof(__pyx_k_te), 0, 1, 0, 1}, + {&__pyx_n_u_tel_Telu, __pyx_k_tel_Telu, sizeof(__pyx_k_tel_Telu), 0, 1, 0, 1}, + {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, + {&__pyx_n_s_tgt_lang, __pyx_k_tgt_lang, sizeof(__pyx_k_tgt_lang), 0, 0, 1, 1}, + {&__pyx_n_s_tokenize, __pyx_k_tokenize, sizeof(__pyx_k_tokenize), 0, 0, 1, 1}, + {&__pyx_n_s_total, __pyx_k_total, sizeof(__pyx_k_total), 0, 0, 1, 1}, + {&__pyx_n_s_tqdm, __pyx_k_tqdm, sizeof(__pyx_k_tqdm), 0, 0, 1, 1}, + {&__pyx_n_s_translate, __pyx_k_translate, sizeof(__pyx_k_translate), 0, 0, 1, 1}, + {&__pyx_n_s_transliterate, __pyx_k_transliterate, sizeof(__pyx_k_transliterate), 0, 0, 1, 1}, + {&__pyx_n_s_trivial_detokenize, __pyx_k_trivial_detokenize, sizeof(__pyx_k_trivial_detokenize), 0, 0, 1, 1}, + {&__pyx_n_s_trivial_tokenize, __pyx_k_trivial_tokenize, sizeof(__pyx_k_trivial_tokenize), 0, 0, 1, 1}, + {&__pyx_n_s_typing, __pyx_k_typing, sizeof(__pyx_k_typing), 0, 0, 1, 1}, + {&__pyx_n_s_unit, __pyx_k_unit, sizeof(__pyx_k_unit), 0, 0, 1, 1}, + {&__pyx_n_u_unr_Deva, __pyx_k_unr_Deva, sizeof(__pyx_k_unr_Deva), 0, 1, 0, 1}, + {&__pyx_n_u_ur, __pyx_k_ur, sizeof(__pyx_k_ur), 0, 1, 0, 1}, + {&__pyx_n_u_urd_Arab, __pyx_k_urd_Arab, sizeof(__pyx_k_urd_Arab), 0, 1, 0, 1}, + {&__pyx_n_s_visualize, __pyx_k_visualize, sizeof(__pyx_k_visualize), 0, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 0} + }; + return __Pyx_InitStrings(__pyx_string_tab); +} +/* #### Code section: cached_builtins ### */ +static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) { + __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 145, __pyx_L1_error) + __pyx_builtin_chr = __Pyx_GetBuiltinName(__pyx_n_s_chr); if (!__pyx_builtin_chr) __PYX_ERR(0, 146, __pyx_L1_error) + __pyx_builtin_TypeError = __Pyx_GetBuiltinName(__pyx_n_s_TypeError); if (!__pyx_builtin_TypeError) __PYX_ERR(1, 2, __pyx_L1_error) + return 0; + __pyx_L1_error:; + return -1; +} +/* #### Code section: cached_constants ### */ + +static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); + + /* "IndicTransToolkit/processor.pyx":312 + * + * # Clean up any remaining placeholder artifacts + * text = re.sub(r"\s+", " ", text).replace(">/", ">").replace("]/", "]") # <<<<<<<<<<<<<< + * self._placeholder_entity_maps.put(placeholder_entity_map) + * return text + */ + __pyx_tuple__150 = PyTuple_Pack(2, __pyx_kp_u__149, __pyx_kp_u__141); if (unlikely(!__pyx_tuple__150)) __PYX_ERR(0, 312, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__150); + __Pyx_GIVEREF(__pyx_tuple__150); + __pyx_tuple__152 = PyTuple_Pack(2, __pyx_kp_u__151, __pyx_kp_u__143); if (unlikely(!__pyx_tuple__152)) __PYX_ERR(0, 312, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__152); + __Pyx_GIVEREF(__pyx_tuple__152); + + /* "IndicTransToolkit/processor.pyx":427 + * if script_code in ["Arab", "Aran"]: + * sent = ( + * sent.replace(" ", "") # <<<<<<<<<<<<<< + * .replace(" ", "") + * .replace(" ", "") + */ + __pyx_tuple__158 = PyTuple_Pack(2, __pyx_kp_u__156, __pyx_kp_u__157); if (unlikely(!__pyx_tuple__158)) __PYX_ERR(0, 427, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__158); + __Pyx_GIVEREF(__pyx_tuple__158); + + /* "IndicTransToolkit/processor.pyx":428 + * sent = ( + * sent.replace(" ", "") + * .replace(" ", "") # <<<<<<<<<<<<<< + * .replace(" ", "") + * .replace("", "") + */ + __pyx_tuple__161 = PyTuple_Pack(2, __pyx_kp_u__159, __pyx_kp_u__160); if (unlikely(!__pyx_tuple__161)) __PYX_ERR(0, 428, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__161); + __Pyx_GIVEREF(__pyx_tuple__161); + + /* "IndicTransToolkit/processor.pyx":429 + * sent.replace(" ", "") + * .replace(" ", "") + * .replace(" ", "") # <<<<<<<<<<<<<< + * .replace("", "") + * ) + */ + __pyx_tuple__164 = PyTuple_Pack(2, __pyx_kp_u__162, __pyx_kp_u__163); if (unlikely(!__pyx_tuple__164)) __PYX_ERR(0, 429, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__164); + __Pyx_GIVEREF(__pyx_tuple__164); + + /* "IndicTransToolkit/processor.pyx":430 + * .replace(" ", "") + * .replace(" ", "") + * .replace("", "") # <<<<<<<<<<<<<< + * ) + * + */ + __pyx_tuple__167 = PyTuple_Pack(2, __pyx_kp_u__165, __pyx_n_u__166); if (unlikely(!__pyx_tuple__167)) __PYX_ERR(0, 430, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__167); + __Pyx_GIVEREF(__pyx_tuple__167); + + /* "IndicTransToolkit/processor.pyx":435 + * # Oriya fix + * if lang_code == "ory": + * sent = sent.replace("", "") # <<<<<<<<<<<<<< + * + * # Restore placeholders + */ + __pyx_tuple__170 = PyTuple_Pack(2, __pyx_kp_u__168, __pyx_n_u__169); if (unlikely(!__pyx_tuple__170)) __PYX_ERR(0, 435, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__170); + __Pyx_GIVEREF(__pyx_tuple__170); + + /* "IndicTransToolkit/processor.pyx":449 + * + * # Exposed Method: Preprocess a Batch of Sentences + * cpdef list preprocess_batch( # <<<<<<<<<<<<<< + * self, + * List[str] batch, + */ + __pyx_tuple__172 = PyTuple_Pack(6, __pyx_n_s_self, __pyx_n_s_batch, __pyx_n_s_src_lang, __pyx_n_s_tgt_lang, __pyx_n_s_is_target, __pyx_n_s_visualize); if (unlikely(!__pyx_tuple__172)) __PYX_ERR(0, 449, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__172); + __Pyx_GIVEREF(__pyx_tuple__172); + __pyx_codeobj__173 = (PyObject*)__Pyx_PyCode_New(6, 0, 0, 6, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__172, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_IndicTransToolkit_processor_pyx, __pyx_n_s_preprocess_batch, 449, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__173)) __PYX_ERR(0, 449, __pyx_L1_error) + __pyx_tuple__174 = PyTuple_Pack(3, Py_None, Py_False, Py_False); if (unlikely(!__pyx_tuple__174)) __PYX_ERR(0, 449, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__174); + __Pyx_GIVEREF(__pyx_tuple__174); + + /* "IndicTransToolkit/processor.pyx":479 + * + * # Exposed Method: Postprocess a Batch of Sentences + * cpdef list postprocess_batch( # <<<<<<<<<<<<<< + * self, + * List[str] sents, + */ + __pyx_tuple__175 = PyTuple_Pack(4, __pyx_n_s_self, __pyx_n_s_sents, __pyx_n_s_lang, __pyx_n_s_visualize); if (unlikely(!__pyx_tuple__175)) __PYX_ERR(0, 479, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__175); + __Pyx_GIVEREF(__pyx_tuple__175); + __pyx_codeobj__176 = (PyObject*)__Pyx_PyCode_New(4, 0, 0, 4, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__175, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_IndicTransToolkit_processor_pyx, __pyx_n_s_postprocess_batch, 479, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__176)) __PYX_ERR(0, 479, __pyx_L1_error) + __pyx_tuple__177 = PyTuple_Pack(2, __pyx_n_u_hin_Deva, Py_False); if (unlikely(!__pyx_tuple__177)) __PYX_ERR(0, 479, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__177); + __Pyx_GIVEREF(__pyx_tuple__177); + + /* "(tree fragment)":1 + * def __reduce_cython__(self): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): + */ + __pyx_tuple__178 = PyTuple_Pack(1, __pyx_n_s_self); if (unlikely(!__pyx_tuple__178)) __PYX_ERR(1, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__178); + __Pyx_GIVEREF(__pyx_tuple__178); + __pyx_codeobj__179 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__178, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_reduce_cython, 1, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__179)) __PYX_ERR(1, 1, __pyx_L1_error) + + /* "(tree fragment)":3 + * def __reduce_cython__(self): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + */ + __pyx_tuple__180 = PyTuple_Pack(2, __pyx_n_s_self, __pyx_n_s_pyx_state); if (unlikely(!__pyx_tuple__180)) __PYX_ERR(1, 3, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__180); + __Pyx_GIVEREF(__pyx_tuple__180); + __pyx_codeobj__181 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__180, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_setstate_cython, 3, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__181)) __PYX_ERR(1, 3, __pyx_L1_error) + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_RefNannyFinishContext(); + return -1; +} +/* #### Code section: init_constants ### */ + +static CYTHON_SMALL_CODE int __Pyx_InitConstants(void) { + __pyx_umethod_PyDict_Type_get.type = (PyObject*)&PyDict_Type; + __pyx_umethod_PyDict_Type_get.method_name = &__pyx_n_s_get; + __pyx_umethod_PyUnicode_Type_strip.type = (PyObject*)&PyUnicode_Type; + __pyx_umethod_PyUnicode_Type_strip.method_name = &__pyx_n_s_strip; + __pyx_umethod_PyUnicode_Type_translate.type = (PyObject*)&PyUnicode_Type; + __pyx_umethod_PyUnicode_Type_translate.method_name = &__pyx_n_s_translate; + if (__Pyx_CreateStringTabAndInitStrings() < 0) __PYX_ERR(0, 1, __pyx_L1_error); + __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) __PYX_ERR(0, 1, __pyx_L1_error) + return 0; + __pyx_L1_error:; + return -1; +} +/* #### Code section: init_globals ### */ + +static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) { + return 0; +} +/* #### Code section: init_module ### */ + +static CYTHON_SMALL_CODE int __Pyx_modinit_global_init_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_variable_export_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_function_export_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_type_init_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_type_import_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_variable_import_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_function_import_code(void); /*proto*/ + +static int __Pyx_modinit_global_init_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_global_init_code", 0); + /*--- Global init code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_variable_export_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_variable_export_code", 0); + /*--- Variable export code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_function_export_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_function_export_code", 0); + /*--- Function export code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_type_init_code(void) { + __Pyx_RefNannyDeclarations + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0); + /*--- Type init code ---*/ + __pyx_vtabptr_17IndicTransToolkit_9processor_IndicProcessor = &__pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor; + __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor._apply_punc_replacements = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__apply_punc_replacements; + __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor._punc_norm = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__punc_norm; + __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor._wrap_with_placeholders = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__wrap_with_placeholders; + __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor._normalize = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__normalize; + __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor._do_indic_tokenize_and_transliterate = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *, PyObject *, int))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__do_indic_tokenize_and_transliterate; + __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor._preprocess = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *, PyObject *, PyObject *, int))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__preprocess; + __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor._postprocess = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor__postprocess; + __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor.preprocess_batch = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, PyObject *, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch *__pyx_optional_args))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor_preprocess_batch; + __pyx_vtable_17IndicTransToolkit_9processor_IndicProcessor.postprocess_batch = (PyObject *(*)(struct __pyx_obj_17IndicTransToolkit_9processor_IndicProcessor *, PyObject *, int __pyx_skip_dispatch, struct __pyx_opt_args_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch *__pyx_optional_args))__pyx_f_17IndicTransToolkit_9processor_14IndicProcessor_postprocess_batch; + #if CYTHON_USE_TYPE_SPECS + __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_17IndicTransToolkit_9processor_IndicProcessor_spec, NULL); if (unlikely(!__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor)) __PYX_ERR(0, 20, __pyx_L1_error) + if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_17IndicTransToolkit_9processor_IndicProcessor_spec, __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor) < 0) __PYX_ERR(0, 20, __pyx_L1_error) + #else + __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor = &__pyx_type_17IndicTransToolkit_9processor_IndicProcessor; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + #endif + #if !CYTHON_USE_TYPE_SPECS + if (__Pyx_PyType_Ready(__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor) < 0) __PYX_ERR(0, 20, __pyx_L1_error) + #endif + #if PY_MAJOR_VERSION < 3 + __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor->tp_print = 0; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor->tp_dictoffset && __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor->tp_getattro == PyObject_GenericGetAttr)) { + __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor->tp_getattro = __Pyx_PyObject_GenericGetAttr; + } + #endif + if (__Pyx_SetVtable(__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor, __pyx_vtabptr_17IndicTransToolkit_9processor_IndicProcessor) < 0) __PYX_ERR(0, 20, __pyx_L1_error) + #if !CYTHON_COMPILING_IN_LIMITED_API + if (__Pyx_MergeVtables(__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor) < 0) __PYX_ERR(0, 20, __pyx_L1_error) + #endif + if (PyObject_SetAttr(__pyx_m, __pyx_n_s_IndicProcessor, (PyObject *) __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor) < 0) __PYX_ERR(0, 20, __pyx_L1_error) + #if !CYTHON_COMPILING_IN_LIMITED_API + if (__Pyx_setup_reduce((PyObject *) __pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor) < 0) __PYX_ERR(0, 20, __pyx_L1_error) + #endif + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_RefNannyFinishContext(); + return -1; +} + +static int __Pyx_modinit_type_import_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_type_import_code", 0); + /*--- Type import code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_variable_import_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_variable_import_code", 0); + /*--- Variable import code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_function_import_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_function_import_code", 0); + /*--- Function import code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + + +#if PY_MAJOR_VERSION >= 3 +#if CYTHON_PEP489_MULTI_PHASE_INIT +static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/ +static int __pyx_pymod_exec_processor(PyObject* module); /*proto*/ +static PyModuleDef_Slot __pyx_moduledef_slots[] = { + {Py_mod_create, (void*)__pyx_pymod_create}, + {Py_mod_exec, (void*)__pyx_pymod_exec_processor}, + {0, NULL} +}; +#endif + +#ifdef __cplusplus +namespace { + struct PyModuleDef __pyx_moduledef = + #else + static struct PyModuleDef __pyx_moduledef = + #endif + { + PyModuleDef_HEAD_INIT, + "processor", + __pyx_k_Cython_version_of_the_IndicProc, /* m_doc */ + #if CYTHON_PEP489_MULTI_PHASE_INIT + 0, /* m_size */ + #elif CYTHON_USE_MODULE_STATE + sizeof(__pyx_mstate), /* m_size */ + #else + -1, /* m_size */ + #endif + __pyx_methods /* m_methods */, + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_moduledef_slots, /* m_slots */ + #else + NULL, /* m_reload */ + #endif + #if CYTHON_USE_MODULE_STATE + __pyx_m_traverse, /* m_traverse */ + __pyx_m_clear, /* m_clear */ + NULL /* m_free */ + #else + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL /* m_free */ + #endif + }; + #ifdef __cplusplus +} /* anonymous namespace */ +#endif +#endif + +#ifndef CYTHON_NO_PYINIT_EXPORT +#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC +#elif PY_MAJOR_VERSION < 3 +#ifdef __cplusplus +#define __Pyx_PyMODINIT_FUNC extern "C" void +#else +#define __Pyx_PyMODINIT_FUNC void +#endif +#else +#ifdef __cplusplus +#define __Pyx_PyMODINIT_FUNC extern "C" PyObject * +#else +#define __Pyx_PyMODINIT_FUNC PyObject * +#endif +#endif + + +#if PY_MAJOR_VERSION < 3 +__Pyx_PyMODINIT_FUNC initprocessor(void) CYTHON_SMALL_CODE; /*proto*/ +__Pyx_PyMODINIT_FUNC initprocessor(void) +#else +__Pyx_PyMODINIT_FUNC PyInit_processor(void) CYTHON_SMALL_CODE; /*proto*/ +__Pyx_PyMODINIT_FUNC PyInit_processor(void) +#if CYTHON_PEP489_MULTI_PHASE_INIT +{ + return PyModuleDef_Init(&__pyx_moduledef); +} +static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) { + #if PY_VERSION_HEX >= 0x030700A1 + static PY_INT64_T main_interpreter_id = -1; + PY_INT64_T current_id = PyInterpreterState_GetID(PyThreadState_Get()->interp); + if (main_interpreter_id == -1) { + main_interpreter_id = current_id; + return (unlikely(current_id == -1)) ? -1 : 0; + } else if (unlikely(main_interpreter_id != current_id)) + #else + static PyInterpreterState *main_interpreter = NULL; + PyInterpreterState *current_interpreter = PyThreadState_Get()->interp; + if (!main_interpreter) { + main_interpreter = current_interpreter; + } else if (unlikely(main_interpreter != current_interpreter)) + #endif + { + PyErr_SetString( + PyExc_ImportError, + "Interpreter change detected - this module can only be loaded into one interpreter per process."); + return -1; + } + return 0; +} +#if CYTHON_COMPILING_IN_LIMITED_API +static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *module, const char* from_name, const char* to_name, int allow_none) +#else +static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name, int allow_none) +#endif +{ + PyObject *value = PyObject_GetAttrString(spec, from_name); + int result = 0; + if (likely(value)) { + if (allow_none || value != Py_None) { +#if CYTHON_COMPILING_IN_LIMITED_API + result = PyModule_AddObject(module, to_name, value); +#else + result = PyDict_SetItemString(moddict, to_name, value); +#endif + } + Py_DECREF(value); + } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + } else { + result = -1; + } + return result; +} +static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def) { + PyObject *module = NULL, *moddict, *modname; + CYTHON_UNUSED_VAR(def); + if (__Pyx_check_single_interpreter()) + return NULL; + if (__pyx_m) + return __Pyx_NewRef(__pyx_m); + modname = PyObject_GetAttrString(spec, "name"); + if (unlikely(!modname)) goto bad; + module = PyModule_NewObject(modname); + Py_DECREF(modname); + if (unlikely(!module)) goto bad; +#if CYTHON_COMPILING_IN_LIMITED_API + moddict = module; +#else + moddict = PyModule_GetDict(module); + if (unlikely(!moddict)) goto bad; +#endif + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__", 0) < 0)) goto bad; + return module; +bad: + Py_XDECREF(module); + return NULL; +} + + +static CYTHON_SMALL_CODE int __pyx_pymod_exec_processor(PyObject *__pyx_pyinit_module) +#endif +#endif +{ + int stringtab_initialized = 0; + #if CYTHON_USE_MODULE_STATE + int pystate_addmodule_run = 0; + #endif + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannyDeclarations + #if CYTHON_PEP489_MULTI_PHASE_INIT + if (__pyx_m) { + if (__pyx_m == __pyx_pyinit_module) return 0; + PyErr_SetString(PyExc_RuntimeError, "Module 'processor' has already been imported. Re-initialisation is not supported."); + return -1; + } + #elif PY_MAJOR_VERSION >= 3 + if (__pyx_m) return __Pyx_NewRef(__pyx_m); + #endif + /*--- Module creation code ---*/ + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_m = __pyx_pyinit_module; + Py_INCREF(__pyx_m); + #else + #if PY_MAJOR_VERSION < 3 + __pyx_m = Py_InitModule4("processor", __pyx_methods, __pyx_k_Cython_version_of_the_IndicProc, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); + if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) + #elif CYTHON_USE_MODULE_STATE + __pyx_t_1 = PyModule_Create(&__pyx_moduledef); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) + { + int add_module_result = PyState_AddModule(__pyx_t_1, &__pyx_moduledef); + __pyx_t_1 = 0; /* transfer ownership from __pyx_t_1 to "processor" pseudovariable */ + if (unlikely((add_module_result < 0))) __PYX_ERR(0, 1, __pyx_L1_error) + pystate_addmodule_run = 1; + } + #else + __pyx_m = PyModule_Create(&__pyx_moduledef); + if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #endif + CYTHON_UNUSED_VAR(__pyx_t_1); + __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error) + Py_INCREF(__pyx_d); + __pyx_b = __Pyx_PyImport_AddModuleRef(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_cython_runtime = __Pyx_PyImport_AddModuleRef((const char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error) + if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #if CYTHON_REFNANNY +__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); +if (!__Pyx_RefNanny) { + PyErr_Clear(); + __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); + if (!__Pyx_RefNanny) + Py_FatalError("failed to import 'refnanny' module"); +} +#endif + __Pyx_RefNannySetupContext("__Pyx_PyMODINIT_FUNC PyInit_processor(void)", 0); + if (__Pyx_check_binary_version(__PYX_LIMITED_VERSION_HEX, __Pyx_get_runtime_version(), CYTHON_COMPILING_IN_LIMITED_API) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #ifdef __Pxy_PyFrame_Initialize_Offsets + __Pxy_PyFrame_Initialize_Offsets(); + #endif + __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error) + #ifdef __Pyx_CyFunction_USED + if (__pyx_CyFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_FusedFunction_USED + if (__pyx_FusedFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Coroutine_USED + if (__pyx_Coroutine_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Generator_USED + if (__pyx_Generator_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_AsyncGen_USED + if (__pyx_AsyncGen_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_StopAsyncIteration_USED + if (__pyx_StopAsyncIteration_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + /*--- Library function declarations ---*/ + /*--- Threads initialization code ---*/ + #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 && defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS + PyEval_InitThreads(); + #endif + /*--- Initialize various global constants etc. ---*/ + if (__Pyx_InitConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + stringtab_initialized = 1; + if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) + if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + if (__pyx_module_is_main_IndicTransToolkit__processor) { + if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + } + #if PY_MAJOR_VERSION >= 3 + { + PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) + if (!PyDict_GetItemString(modules, "IndicTransToolkit.processor")) { + if (unlikely((PyDict_SetItemString(modules, "IndicTransToolkit.processor", __pyx_m) < 0))) __PYX_ERR(0, 1, __pyx_L1_error) + } + } + #endif + /*--- Builtin init code ---*/ + if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Constants init code ---*/ + if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Global type/function init code ---*/ + (void)__Pyx_modinit_global_init_code(); + (void)__Pyx_modinit_variable_export_code(); + (void)__Pyx_modinit_function_export_code(); + if (unlikely((__Pyx_modinit_type_init_code() < 0))) __PYX_ERR(0, 1, __pyx_L1_error) + (void)__Pyx_modinit_type_import_code(); + (void)__Pyx_modinit_variable_import_code(); + (void)__Pyx_modinit_function_import_code(); + /*--- Execution code ---*/ + #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) + if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + + /* "IndicTransToolkit/processor.pyx":8 + * """ + * + * import regex as re # <<<<<<<<<<<<<< + * from tqdm import tqdm + * from queue import Queue + */ + __pyx_t_2 = __Pyx_ImportDottedModule(__pyx_n_s_regex, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 8, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_re, __pyx_t_2) < 0) __PYX_ERR(0, 8, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "IndicTransToolkit/processor.pyx":9 + * + * import regex as re + * from tqdm import tqdm # <<<<<<<<<<<<<< + * from queue import Queue + * from typing import List, Dict, Union + */ + __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 9, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_INCREF(__pyx_n_s_tqdm); + __Pyx_GIVEREF(__pyx_n_s_tqdm); + if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 0, __pyx_n_s_tqdm)) __PYX_ERR(0, 9, __pyx_L1_error); + __pyx_t_3 = __Pyx_Import(__pyx_n_s_tqdm, __pyx_t_2, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 9, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_tqdm); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 9, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_tqdm, __pyx_t_2) < 0) __PYX_ERR(0, 9, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":10 + * import regex as re + * from tqdm import tqdm + * from queue import Queue # <<<<<<<<<<<<<< + * from typing import List, Dict, Union + * + */ + __pyx_t_3 = PyList_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 10, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_INCREF(__pyx_n_s_Queue); + __Pyx_GIVEREF(__pyx_n_s_Queue); + if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 0, __pyx_n_s_Queue)) __PYX_ERR(0, 10, __pyx_L1_error); + __pyx_t_2 = __Pyx_Import(__pyx_n_s_queue, __pyx_t_3, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 10, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_Queue); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 10, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_Queue, __pyx_t_3) < 0) __PYX_ERR(0, 10, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "IndicTransToolkit/processor.pyx":11 + * from tqdm import tqdm + * from queue import Queue + * from typing import List, Dict, Union # <<<<<<<<<<<<<< + * + * # Importing Python objects since these libraries don't offer C-extensions + */ + __pyx_t_2 = PyList_New(3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 11, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_INCREF(__pyx_n_s_List); + __Pyx_GIVEREF(__pyx_n_s_List); + if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 0, __pyx_n_s_List)) __PYX_ERR(0, 11, __pyx_L1_error); + __Pyx_INCREF(__pyx_n_s_Dict); + __Pyx_GIVEREF(__pyx_n_s_Dict); + if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 1, __pyx_n_s_Dict)) __PYX_ERR(0, 11, __pyx_L1_error); + __Pyx_INCREF(__pyx_n_s_Union); + __Pyx_GIVEREF(__pyx_n_s_Union); + if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 2, __pyx_n_s_Union)) __PYX_ERR(0, 11, __pyx_L1_error); + __pyx_t_3 = __Pyx_Import(__pyx_n_s_typing, __pyx_t_2, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 11, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_List); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 11, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_List, __pyx_t_2) < 0) __PYX_ERR(0, 11, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_Dict); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 11, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_Dict, __pyx_t_2) < 0) __PYX_ERR(0, 11, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_Union); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 11, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_Union, __pyx_t_2) < 0) __PYX_ERR(0, 11, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":14 + * + * # Importing Python objects since these libraries don't offer C-extensions + * from indicnlp.tokenize import indic_tokenize, indic_detokenize # <<<<<<<<<<<<<< + * from indicnlp.normalize.indic_normalize import IndicNormalizerFactory + * from sacremoses import MosesPunctNormalizer, MosesTokenizer, MosesDetokenizer + */ + __pyx_t_3 = PyList_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 14, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_INCREF(__pyx_n_s_indic_tokenize); + __Pyx_GIVEREF(__pyx_n_s_indic_tokenize); + if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 0, __pyx_n_s_indic_tokenize)) __PYX_ERR(0, 14, __pyx_L1_error); + __Pyx_INCREF(__pyx_n_s_indic_detokenize); + __Pyx_GIVEREF(__pyx_n_s_indic_detokenize); + if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 1, __pyx_n_s_indic_detokenize)) __PYX_ERR(0, 14, __pyx_L1_error); + __pyx_t_2 = __Pyx_Import(__pyx_n_s_indicnlp_tokenize, __pyx_t_3, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 14, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_indic_tokenize); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 14, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_indic_tokenize, __pyx_t_3) < 0) __PYX_ERR(0, 14, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_indic_detokenize); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 14, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_indic_detokenize, __pyx_t_3) < 0) __PYX_ERR(0, 14, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "IndicTransToolkit/processor.pyx":15 + * # Importing Python objects since these libraries don't offer C-extensions + * from indicnlp.tokenize import indic_tokenize, indic_detokenize + * from indicnlp.normalize.indic_normalize import IndicNormalizerFactory # <<<<<<<<<<<<<< + * from sacremoses import MosesPunctNormalizer, MosesTokenizer, MosesDetokenizer + * from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator + */ + __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 15, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_INCREF(__pyx_n_s_IndicNormalizerFactory); + __Pyx_GIVEREF(__pyx_n_s_IndicNormalizerFactory); + if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 0, __pyx_n_s_IndicNormalizerFactory)) __PYX_ERR(0, 15, __pyx_L1_error); + __pyx_t_3 = __Pyx_Import(__pyx_n_s_indicnlp_normalize_indic_normali, __pyx_t_2, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 15, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_IndicNormalizerFactory); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 15, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_IndicNormalizerFactory, __pyx_t_2) < 0) __PYX_ERR(0, 15, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":16 + * from indicnlp.tokenize import indic_tokenize, indic_detokenize + * from indicnlp.normalize.indic_normalize import IndicNormalizerFactory + * from sacremoses import MosesPunctNormalizer, MosesTokenizer, MosesDetokenizer # <<<<<<<<<<<<<< + * from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator + * + */ + __pyx_t_3 = PyList_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 16, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_INCREF(__pyx_n_s_MosesPunctNormalizer); + __Pyx_GIVEREF(__pyx_n_s_MosesPunctNormalizer); + if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 0, __pyx_n_s_MosesPunctNormalizer)) __PYX_ERR(0, 16, __pyx_L1_error); + __Pyx_INCREF(__pyx_n_s_MosesTokenizer); + __Pyx_GIVEREF(__pyx_n_s_MosesTokenizer); + if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 1, __pyx_n_s_MosesTokenizer)) __PYX_ERR(0, 16, __pyx_L1_error); + __Pyx_INCREF(__pyx_n_s_MosesDetokenizer); + __Pyx_GIVEREF(__pyx_n_s_MosesDetokenizer); + if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 2, __pyx_n_s_MosesDetokenizer)) __PYX_ERR(0, 16, __pyx_L1_error); + __pyx_t_2 = __Pyx_Import(__pyx_n_s_sacremoses, __pyx_t_3, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 16, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_MosesPunctNormalizer); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 16, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_MosesPunctNormalizer, __pyx_t_3) < 0) __PYX_ERR(0, 16, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_MosesTokenizer); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 16, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_MosesTokenizer, __pyx_t_3) < 0) __PYX_ERR(0, 16, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_MosesDetokenizer); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 16, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_MosesDetokenizer, __pyx_t_3) < 0) __PYX_ERR(0, 16, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "IndicTransToolkit/processor.pyx":17 + * from indicnlp.normalize.indic_normalize import IndicNormalizerFactory + * from sacremoses import MosesPunctNormalizer, MosesTokenizer, MosesDetokenizer + * from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator # <<<<<<<<<<<<<< + * + * + */ + __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 17, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_INCREF(__pyx_n_s_UnicodeIndicTransliterator); + __Pyx_GIVEREF(__pyx_n_s_UnicodeIndicTransliterator); + if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 0, __pyx_n_s_UnicodeIndicTransliterator)) __PYX_ERR(0, 17, __pyx_L1_error); + __pyx_t_3 = __Pyx_Import(__pyx_n_s_indicnlp_transliterate_unicode_t, __pyx_t_2, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 17, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_UnicodeIndicTransliterator); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 17, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_UnicodeIndicTransliterator, __pyx_t_2) < 0) __PYX_ERR(0, 17, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":449 + * + * # Exposed Method: Preprocess a Batch of Sentences + * cpdef list preprocess_batch( # <<<<<<<<<<<<<< + * self, + * List[str] batch, + */ + __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_3preprocess_batch, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_IndicProcessor_preprocess_batch, NULL, __pyx_n_s_IndicTransToolkit_processor, __pyx_d, ((PyObject *)__pyx_codeobj__173)); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 449, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_3, __pyx_tuple__174); + if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor, __pyx_n_s_preprocess_batch, __pyx_t_3) < 0) __PYX_ERR(0, 449, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + PyType_Modified(__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor); + + /* "IndicTransToolkit/processor.pyx":479 + * + * # Exposed Method: Postprocess a Batch of Sentences + * cpdef list postprocess_batch( # <<<<<<<<<<<<<< + * self, + * List[str] sents, + */ + __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_5postprocess_batch, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_IndicProcessor_postprocess_batch, NULL, __pyx_n_s_IndicTransToolkit_processor, __pyx_d, ((PyObject *)__pyx_codeobj__176)); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 479, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_3, __pyx_tuple__177); + if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor, __pyx_n_s_postprocess_batch, __pyx_t_3) < 0) __PYX_ERR(0, 479, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + PyType_Modified(__pyx_ptype_17IndicTransToolkit_9processor_IndicProcessor); + + /* "(tree fragment)":1 + * def __reduce_cython__(self): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): + */ + __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_7__reduce_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_IndicProcessor___reduce_cython, NULL, __pyx_n_s_IndicTransToolkit_processor, __pyx_d, ((PyObject *)__pyx_codeobj__179)); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_reduce_cython, __pyx_t_3) < 0) __PYX_ERR(1, 1, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "(tree fragment)":3 + * def __reduce_cython__(self): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + */ + __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_17IndicTransToolkit_9processor_14IndicProcessor_9__setstate_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_IndicProcessor___setstate_cython, NULL, __pyx_n_s_IndicTransToolkit_processor, __pyx_d, ((PyObject *)__pyx_codeobj__181)); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 3, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_setstate_cython, __pyx_t_3) < 0) __PYX_ERR(1, 3, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "IndicTransToolkit/processor.pyx":1 + * # cython: language_level=3, boundscheck=False, cdivision=True, wraparound=False # <<<<<<<<<<<<<< + * """ + * Cython version of the IndicProcessor class with optimizations for performance. + */ + __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_3) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /*--- Wrapped vars code ---*/ + + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + if (__pyx_m) { + if (__pyx_d && stringtab_initialized) { + __Pyx_AddTraceback("init IndicTransToolkit.processor", __pyx_clineno, __pyx_lineno, __pyx_filename); + } + #if !CYTHON_USE_MODULE_STATE + Py_CLEAR(__pyx_m); + #else + Py_DECREF(__pyx_m); + if (pystate_addmodule_run) { + PyObject *tp, *value, *tb; + PyErr_Fetch(&tp, &value, &tb); + PyState_RemoveModule(&__pyx_moduledef); + PyErr_Restore(tp, value, tb); + } + #endif + } else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ImportError, "init IndicTransToolkit.processor"); + } + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + #if CYTHON_PEP489_MULTI_PHASE_INIT + return (__pyx_m != NULL) ? 0 : -1; + #elif PY_MAJOR_VERSION >= 3 + return __pyx_m; + #else + return; + #endif +} +/* #### Code section: cleanup_globals ### */ +/* #### Code section: cleanup_module ### */ +/* #### Code section: main_method ### */ +/* #### Code section: utility_code_pragmas ### */ +#ifdef _MSC_VER +#pragma warning( push ) +/* Warning 4127: conditional expression is constant + * Cython uses constant conditional expressions to allow in inline functions to be optimized at + * compile-time, so this warning is not useful + */ +#pragma warning( disable : 4127 ) +#endif + + + +/* #### Code section: utility_code_def ### */ + +/* --- Runtime support code --- */ +/* Refnanny */ +#if CYTHON_REFNANNY +static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { + PyObject *m = NULL, *p = NULL; + void *r = NULL; + m = PyImport_ImportModule(modname); + if (!m) goto end; + p = PyObject_GetAttrString(m, "RefNannyAPI"); + if (!p) goto end; + r = PyLong_AsVoidPtr(p); +end: + Py_XDECREF(p); + Py_XDECREF(m); + return (__Pyx_RefNannyAPIStruct *)r; +} +#endif + +/* PyErrExceptionMatches */ +#if CYTHON_FAST_THREAD_STATE +static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(tuple); +#if PY_MAJOR_VERSION >= 3 + for (i=0; i= 0x030C00A6 + PyObject *current_exception = tstate->current_exception; + if (unlikely(!current_exception)) return 0; + exc_type = (PyObject*) Py_TYPE(current_exception); + if (exc_type == err) return 1; +#else + exc_type = tstate->curexc_type; + if (exc_type == err) return 1; + if (unlikely(!exc_type)) return 0; +#endif + #if CYTHON_AVOID_BORROWED_REFS + Py_INCREF(exc_type); + #endif + if (unlikely(PyTuple_Check(err))) { + result = __Pyx_PyErr_ExceptionMatchesTuple(exc_type, err); + } else { + result = __Pyx_PyErr_GivenExceptionMatches(exc_type, err); + } + #if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(exc_type); + #endif + return result; +} +#endif + +/* PyErrFetchRestore */ +#if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { +#if PY_VERSION_HEX >= 0x030C00A6 + PyObject *tmp_value; + assert(type == NULL || (value != NULL && type == (PyObject*) Py_TYPE(value))); + if (value) { + #if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(((PyBaseExceptionObject*) value)->traceback != tb)) + #endif + PyException_SetTraceback(value, tb); + } + tmp_value = tstate->current_exception; + tstate->current_exception = value; + Py_XDECREF(tmp_value); + Py_XDECREF(type); + Py_XDECREF(tb); +#else + PyObject *tmp_type, *tmp_value, *tmp_tb; + tmp_type = tstate->curexc_type; + tmp_value = tstate->curexc_value; + tmp_tb = tstate->curexc_traceback; + tstate->curexc_type = type; + tstate->curexc_value = value; + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +#endif +} +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { +#if PY_VERSION_HEX >= 0x030C00A6 + PyObject* exc_value; + exc_value = tstate->current_exception; + tstate->current_exception = 0; + *value = exc_value; + *type = NULL; + *tb = NULL; + if (exc_value) { + *type = (PyObject*) Py_TYPE(exc_value); + Py_INCREF(*type); + #if CYTHON_COMPILING_IN_CPYTHON + *tb = ((PyBaseExceptionObject*) exc_value)->traceback; + Py_XINCREF(*tb); + #else + *tb = PyException_GetTraceback(exc_value); + #endif + } +#else + *type = tstate->curexc_type; + *value = tstate->curexc_value; + *tb = tstate->curexc_traceback; + tstate->curexc_type = 0; + tstate->curexc_value = 0; + tstate->curexc_traceback = 0; +#endif +} +#endif + +/* PyObjectGetAttrStr */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_getattro)) + return tp->tp_getattro(obj, attr_name); +#if PY_MAJOR_VERSION < 3 + if (likely(tp->tp_getattr)) + return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); +#endif + return PyObject_GetAttr(obj, attr_name); +} +#endif + +/* PyObjectGetAttrStrNoError */ +#if __PYX_LIMITED_VERSION_HEX < 0x030d00A1 +static void __Pyx_PyObject_GetAttrStr_ClearAttributeError(void) { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + if (likely(__Pyx_PyErr_ExceptionMatches(PyExc_AttributeError))) + __Pyx_PyErr_Clear(); +} +#endif +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name) { + PyObject *result; +#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 + (void) PyObject_GetOptionalAttr(obj, attr_name, &result); + return result; +#else +#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_TYPE_SLOTS && PY_VERSION_HEX >= 0x030700B1 + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_getattro == PyObject_GenericGetAttr)) { + return _PyObject_GenericGetAttrWithDict(obj, attr_name, NULL, 1); + } +#endif + result = __Pyx_PyObject_GetAttrStr(obj, attr_name); + if (unlikely(!result)) { + __Pyx_PyObject_GetAttrStr_ClearAttributeError(); + } + return result; +#endif +} + +/* GetBuiltinName */ +static PyObject *__Pyx_GetBuiltinName(PyObject *name) { + PyObject* result = __Pyx_PyObject_GetAttrStrNoError(__pyx_b, name); + if (unlikely(!result) && !PyErr_Occurred()) { + PyErr_Format(PyExc_NameError, +#if PY_MAJOR_VERSION >= 3 + "name '%U' is not defined", name); +#else + "name '%.200s' is not defined", PyString_AS_STRING(name)); +#endif + } + return result; +} + +/* TupleAndListFromArray */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE void __Pyx_copy_object_array(PyObject *const *CYTHON_RESTRICT src, PyObject** CYTHON_RESTRICT dest, Py_ssize_t length) { + PyObject *v; + Py_ssize_t i; + for (i = 0; i < length; i++) { + v = dest[i] = src[i]; + Py_INCREF(v); + } +} +static CYTHON_INLINE PyObject * +__Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n) +{ + PyObject *res; + if (n <= 0) { + Py_INCREF(__pyx_empty_tuple); + return __pyx_empty_tuple; + } + res = PyTuple_New(n); + if (unlikely(res == NULL)) return NULL; + __Pyx_copy_object_array(src, ((PyTupleObject*)res)->ob_item, n); + return res; +} +static CYTHON_INLINE PyObject * +__Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n) +{ + PyObject *res; + if (n <= 0) { + return PyList_New(0); + } + res = PyList_New(n); + if (unlikely(res == NULL)) return NULL; + __Pyx_copy_object_array(src, ((PyListObject*)res)->ob_item, n); + return res; +} +#endif + +/* BytesEquals */ +static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) { +#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API + return PyObject_RichCompareBool(s1, s2, equals); +#else + if (s1 == s2) { + return (equals == Py_EQ); + } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) { + const char *ps1, *ps2; + Py_ssize_t length = PyBytes_GET_SIZE(s1); + if (length != PyBytes_GET_SIZE(s2)) + return (equals == Py_NE); + ps1 = PyBytes_AS_STRING(s1); + ps2 = PyBytes_AS_STRING(s2); + if (ps1[0] != ps2[0]) { + return (equals == Py_NE); + } else if (length == 1) { + return (equals == Py_EQ); + } else { + int result; +#if CYTHON_USE_UNICODE_INTERNALS && (PY_VERSION_HEX < 0x030B0000) + Py_hash_t hash1, hash2; + hash1 = ((PyBytesObject*)s1)->ob_shash; + hash2 = ((PyBytesObject*)s2)->ob_shash; + if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { + return (equals == Py_NE); + } +#endif + result = memcmp(ps1, ps2, (size_t)length); + return (equals == Py_EQ) ? (result == 0) : (result != 0); + } + } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) { + return (equals == Py_NE); + } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) { + return (equals == Py_NE); + } else { + int result; + PyObject* py_result = PyObject_RichCompare(s1, s2, equals); + if (!py_result) + return -1; + result = __Pyx_PyObject_IsTrue(py_result); + Py_DECREF(py_result); + return result; + } +#endif +} + +/* UnicodeEquals */ +static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) { +#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API + return PyObject_RichCompareBool(s1, s2, equals); +#else +#if PY_MAJOR_VERSION < 3 + PyObject* owned_ref = NULL; +#endif + int s1_is_unicode, s2_is_unicode; + if (s1 == s2) { + goto return_eq; + } + s1_is_unicode = PyUnicode_CheckExact(s1); + s2_is_unicode = PyUnicode_CheckExact(s2); +#if PY_MAJOR_VERSION < 3 + if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) { + owned_ref = PyUnicode_FromObject(s2); + if (unlikely(!owned_ref)) + return -1; + s2 = owned_ref; + s2_is_unicode = 1; + } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) { + owned_ref = PyUnicode_FromObject(s1); + if (unlikely(!owned_ref)) + return -1; + s1 = owned_ref; + s1_is_unicode = 1; + } else if (((!s2_is_unicode) & (!s1_is_unicode))) { + return __Pyx_PyBytes_Equals(s1, s2, equals); + } +#endif + if (s1_is_unicode & s2_is_unicode) { + Py_ssize_t length; + int kind; + void *data1, *data2; + if (unlikely(__Pyx_PyUnicode_READY(s1) < 0) || unlikely(__Pyx_PyUnicode_READY(s2) < 0)) + return -1; + length = __Pyx_PyUnicode_GET_LENGTH(s1); + if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) { + goto return_ne; + } +#if CYTHON_USE_UNICODE_INTERNALS + { + Py_hash_t hash1, hash2; + #if CYTHON_PEP393_ENABLED + hash1 = ((PyASCIIObject*)s1)->hash; + hash2 = ((PyASCIIObject*)s2)->hash; + #else + hash1 = ((PyUnicodeObject*)s1)->hash; + hash2 = ((PyUnicodeObject*)s2)->hash; + #endif + if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { + goto return_ne; + } + } +#endif + kind = __Pyx_PyUnicode_KIND(s1); + if (kind != __Pyx_PyUnicode_KIND(s2)) { + goto return_ne; + } + data1 = __Pyx_PyUnicode_DATA(s1); + data2 = __Pyx_PyUnicode_DATA(s2); + if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) { + goto return_ne; + } else if (length == 1) { + goto return_eq; + } else { + int result = memcmp(data1, data2, (size_t)(length * kind)); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + return (equals == Py_EQ) ? (result == 0) : (result != 0); + } + } else if ((s1 == Py_None) & s2_is_unicode) { + goto return_ne; + } else if ((s2 == Py_None) & s1_is_unicode) { + goto return_ne; + } else { + int result; + PyObject* py_result = PyObject_RichCompare(s1, s2, equals); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + if (!py_result) + return -1; + result = __Pyx_PyObject_IsTrue(py_result); + Py_DECREF(py_result); + return result; + } +return_eq: + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + return (equals == Py_EQ); +return_ne: + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + return (equals == Py_NE); +#endif +} + +/* fastcall */ +#if CYTHON_METH_FASTCALL +static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s) +{ + Py_ssize_t i, n = PyTuple_GET_SIZE(kwnames); + for (i = 0; i < n; i++) + { + if (s == PyTuple_GET_ITEM(kwnames, i)) return kwvalues[i]; + } + for (i = 0; i < n; i++) + { + int eq = __Pyx_PyUnicode_Equals(s, PyTuple_GET_ITEM(kwnames, i), Py_EQ); + if (unlikely(eq != 0)) { + if (unlikely(eq < 0)) return NULL; + return kwvalues[i]; + } + } + return NULL; +} +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 +CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues) { + Py_ssize_t i, nkwargs = PyTuple_GET_SIZE(kwnames); + PyObject *dict; + dict = PyDict_New(); + if (unlikely(!dict)) + return NULL; + for (i=0; i= 3 + "%s() got multiple values for keyword argument '%U'", func_name, kw_name); + #else + "%s() got multiple values for keyword argument '%s'", func_name, + PyString_AsString(kw_name)); + #endif +} + +/* ParseKeywords */ +static int __Pyx_ParseOptionalKeywords( + PyObject *kwds, + PyObject *const *kwvalues, + PyObject **argnames[], + PyObject *kwds2, + PyObject *values[], + Py_ssize_t num_pos_args, + const char* function_name) +{ + PyObject *key = 0, *value = 0; + Py_ssize_t pos = 0; + PyObject*** name; + PyObject*** first_kw_arg = argnames + num_pos_args; + int kwds_is_tuple = CYTHON_METH_FASTCALL && likely(PyTuple_Check(kwds)); + while (1) { + Py_XDECREF(key); key = NULL; + Py_XDECREF(value); value = NULL; + if (kwds_is_tuple) { + Py_ssize_t size; +#if CYTHON_ASSUME_SAFE_MACROS + size = PyTuple_GET_SIZE(kwds); +#else + size = PyTuple_Size(kwds); + if (size < 0) goto bad; +#endif + if (pos >= size) break; +#if CYTHON_AVOID_BORROWED_REFS + key = __Pyx_PySequence_ITEM(kwds, pos); + if (!key) goto bad; +#elif CYTHON_ASSUME_SAFE_MACROS + key = PyTuple_GET_ITEM(kwds, pos); +#else + key = PyTuple_GetItem(kwds, pos); + if (!key) goto bad; +#endif + value = kwvalues[pos]; + pos++; + } + else + { + if (!PyDict_Next(kwds, &pos, &key, &value)) break; +#if CYTHON_AVOID_BORROWED_REFS + Py_INCREF(key); +#endif + } + name = first_kw_arg; + while (*name && (**name != key)) name++; + if (*name) { + values[name-argnames] = value; +#if CYTHON_AVOID_BORROWED_REFS + Py_INCREF(value); + Py_DECREF(key); +#endif + key = NULL; + value = NULL; + continue; + } +#if !CYTHON_AVOID_BORROWED_REFS + Py_INCREF(key); +#endif + Py_INCREF(value); + name = first_kw_arg; + #if PY_MAJOR_VERSION < 3 + if (likely(PyString_Check(key))) { + while (*name) { + if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) + && _PyString_Eq(**name, key)) { + values[name-argnames] = value; +#if CYTHON_AVOID_BORROWED_REFS + value = NULL; +#endif + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + if ((**argname == key) || ( + (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) + && _PyString_Eq(**argname, key))) { + goto arg_passed_twice; + } + argname++; + } + } + } else + #endif + if (likely(PyUnicode_Check(key))) { + while (*name) { + int cmp = ( + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : + #endif + PyUnicode_Compare(**name, key) + ); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) { + values[name-argnames] = value; +#if CYTHON_AVOID_BORROWED_REFS + value = NULL; +#endif + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + int cmp = (**argname == key) ? 0 : + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : + #endif + PyUnicode_Compare(**argname, key); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) goto arg_passed_twice; + argname++; + } + } + } else + goto invalid_keyword_type; + if (kwds2) { + if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; + } else { + goto invalid_keyword; + } + } + Py_XDECREF(key); + Py_XDECREF(value); + return 0; +arg_passed_twice: + __Pyx_RaiseDoubleKeywordsError(function_name, key); + goto bad; +invalid_keyword_type: + PyErr_Format(PyExc_TypeError, + "%.200s() keywords must be strings", function_name); + goto bad; +invalid_keyword: + #if PY_MAJOR_VERSION < 3 + PyErr_Format(PyExc_TypeError, + "%.200s() got an unexpected keyword argument '%.200s'", + function_name, PyString_AsString(key)); + #else + PyErr_Format(PyExc_TypeError, + "%s() got an unexpected keyword argument '%U'", + function_name, key); + #endif +bad: + Py_XDECREF(key); + Py_XDECREF(value); + return -1; +} + +/* RaiseArgTupleInvalid */ +static void __Pyx_RaiseArgtupleInvalid( + const char* func_name, + int exact, + Py_ssize_t num_min, + Py_ssize_t num_max, + Py_ssize_t num_found) +{ + Py_ssize_t num_expected; + const char *more_or_less; + if (num_found < num_min) { + num_expected = num_min; + more_or_less = "at least"; + } else { + num_expected = num_max; + more_or_less = "at most"; + } + if (exact) { + more_or_less = "exactly"; + } + PyErr_Format(PyExc_TypeError, + "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)", + func_name, more_or_less, num_expected, + (num_expected == 1) ? "" : "s", num_found); +} + +/* PyFunctionFastCall */ +#if CYTHON_FAST_PYCALL && !CYTHON_VECTORCALL +static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na, + PyObject *globals) { + PyFrameObject *f; + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject **fastlocals; + Py_ssize_t i; + PyObject *result; + assert(globals != NULL); + /* XXX Perhaps we should create a specialized + PyFrame_New() that doesn't take locals, but does + take builtins without sanity checking them. + */ + assert(tstate != NULL); + f = PyFrame_New(tstate, co, globals, NULL); + if (f == NULL) { + return NULL; + } + fastlocals = __Pyx_PyFrame_GetLocalsplus(f); + for (i = 0; i < na; i++) { + Py_INCREF(*args); + fastlocals[i] = *args++; + } + result = PyEval_EvalFrameEx(f,0); + ++tstate->recursion_depth; + Py_DECREF(f); + --tstate->recursion_depth; + return result; +} +static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs) { + PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); + PyObject *globals = PyFunction_GET_GLOBALS(func); + PyObject *argdefs = PyFunction_GET_DEFAULTS(func); + PyObject *closure; +#if PY_MAJOR_VERSION >= 3 + PyObject *kwdefs; +#endif + PyObject *kwtuple, **k; + PyObject **d; + Py_ssize_t nd; + Py_ssize_t nk; + PyObject *result; + assert(kwargs == NULL || PyDict_Check(kwargs)); + nk = kwargs ? PyDict_Size(kwargs) : 0; + #if PY_MAJOR_VERSION < 3 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) { + return NULL; + } + #else + if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) { + return NULL; + } + #endif + if ( +#if PY_MAJOR_VERSION >= 3 + co->co_kwonlyargcount == 0 && +#endif + likely(kwargs == NULL || nk == 0) && + co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) { + if (argdefs == NULL && co->co_argcount == nargs) { + result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals); + goto done; + } + else if (nargs == 0 && argdefs != NULL + && co->co_argcount == Py_SIZE(argdefs)) { + /* function called with no arguments, but all parameters have + a default value: use default values as arguments .*/ + args = &PyTuple_GET_ITEM(argdefs, 0); + result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals); + goto done; + } + } + if (kwargs != NULL) { + Py_ssize_t pos, i; + kwtuple = PyTuple_New(2 * nk); + if (kwtuple == NULL) { + result = NULL; + goto done; + } + k = &PyTuple_GET_ITEM(kwtuple, 0); + pos = i = 0; + while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) { + Py_INCREF(k[i]); + Py_INCREF(k[i+1]); + i += 2; + } + nk = i / 2; + } + else { + kwtuple = NULL; + k = NULL; + } + closure = PyFunction_GET_CLOSURE(func); +#if PY_MAJOR_VERSION >= 3 + kwdefs = PyFunction_GET_KW_DEFAULTS(func); +#endif + if (argdefs != NULL) { + d = &PyTuple_GET_ITEM(argdefs, 0); + nd = Py_SIZE(argdefs); + } + else { + d = NULL; + nd = 0; + } +#if PY_MAJOR_VERSION >= 3 + result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL, + args, (int)nargs, + k, (int)nk, + d, (int)nd, kwdefs, closure); +#else + result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL, + args, (int)nargs, + k, (int)nk, + d, (int)nd, closure); +#endif + Py_XDECREF(kwtuple); +done: + Py_LeaveRecursiveCall(); + return result; +} +#endif + +/* PyObjectCall */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *result; + ternaryfunc call = Py_TYPE(func)->tp_call; + if (unlikely(!call)) + return PyObject_Call(func, arg, kw); + #if PY_MAJOR_VERSION < 3 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + #else + if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) + return NULL; + #endif + result = (*call)(func, arg, kw); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* PyObjectCallMethO */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { + PyObject *self, *result; + PyCFunction cfunc; + cfunc = __Pyx_CyOrPyCFunction_GET_FUNCTION(func); + self = __Pyx_CyOrPyCFunction_GET_SELF(func); + #if PY_MAJOR_VERSION < 3 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + #else + if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) + return NULL; + #endif + result = cfunc(self, arg); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* PyObjectFastCall */ +#if PY_VERSION_HEX < 0x03090000 || CYTHON_COMPILING_IN_LIMITED_API +static PyObject* __Pyx_PyObject_FastCall_fallback(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs) { + PyObject *argstuple; + PyObject *result = 0; + size_t i; + argstuple = PyTuple_New((Py_ssize_t)nargs); + if (unlikely(!argstuple)) return NULL; + for (i = 0; i < nargs; i++) { + Py_INCREF(args[i]); + if (__Pyx_PyTuple_SET_ITEM(argstuple, (Py_ssize_t)i, args[i]) < 0) goto bad; + } + result = __Pyx_PyObject_Call(func, argstuple, kwargs); + bad: + Py_DECREF(argstuple); + return result; +} +#endif +static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t _nargs, PyObject *kwargs) { + Py_ssize_t nargs = __Pyx_PyVectorcall_NARGS(_nargs); +#if CYTHON_COMPILING_IN_CPYTHON + if (nargs == 0 && kwargs == NULL) { + if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_NOARGS)) + return __Pyx_PyObject_CallMethO(func, NULL); + } + else if (nargs == 1 && kwargs == NULL) { + if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_O)) + return __Pyx_PyObject_CallMethO(func, args[0]); + } +#endif + #if PY_VERSION_HEX < 0x030800B1 + #if CYTHON_FAST_PYCCALL + if (PyCFunction_Check(func)) { + if (kwargs) { + return _PyCFunction_FastCallDict(func, args, nargs, kwargs); + } else { + return _PyCFunction_FastCallKeywords(func, args, nargs, NULL); + } + } + #if PY_VERSION_HEX >= 0x030700A1 + if (!kwargs && __Pyx_IS_TYPE(func, &PyMethodDescr_Type)) { + return _PyMethodDescr_FastCallKeywords(func, args, nargs, NULL); + } + #endif + #endif + #if CYTHON_FAST_PYCALL + if (PyFunction_Check(func)) { + return __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs); + } + #endif + #endif + if (kwargs == NULL) { + #if CYTHON_VECTORCALL + #if PY_VERSION_HEX < 0x03090000 + vectorcallfunc f = _PyVectorcall_Function(func); + #else + vectorcallfunc f = PyVectorcall_Function(func); + #endif + if (f) { + return f(func, args, (size_t)nargs, NULL); + } + #elif defined(__Pyx_CyFunction_USED) && CYTHON_BACKPORT_VECTORCALL + if (__Pyx_CyFunction_CheckExact(func)) { + __pyx_vectorcallfunc f = __Pyx_CyFunction_func_vectorcall(func); + if (f) return f(func, args, (size_t)nargs, NULL); + } + #endif + } + if (nargs == 0) { + return __Pyx_PyObject_Call(func, __pyx_empty_tuple, kwargs); + } + #if PY_VERSION_HEX >= 0x03090000 && !CYTHON_COMPILING_IN_LIMITED_API + return PyObject_VectorcallDict(func, args, (size_t)nargs, kwargs); + #else + return __Pyx_PyObject_FastCall_fallback(func, args, (size_t)nargs, kwargs); + #endif +} + +/* IterFinish */ +static CYTHON_INLINE int __Pyx_IterFinish(void) { + PyObject* exc_type; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + exc_type = __Pyx_PyErr_CurrentExceptionType(); + if (unlikely(exc_type)) { + if (unlikely(!__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) + return -1; + __Pyx_PyErr_Clear(); + return 0; + } + return 0; +} + +/* PyObjectCallNoArg */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) { + PyObject *arg[2] = {NULL, NULL}; + return __Pyx_PyObject_FastCall(func, arg + 1, 0 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); +} + +/* PyObjectCallOneArg */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *args[2] = {NULL, arg}; + return __Pyx_PyObject_FastCall(func, args+1, 1 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); +} + +/* PyObjectGetMethod */ +static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method) { + PyObject *attr; +#if CYTHON_UNPACK_METHODS && CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_PYTYPE_LOOKUP + __Pyx_TypeName type_name; + PyTypeObject *tp = Py_TYPE(obj); + PyObject *descr; + descrgetfunc f = NULL; + PyObject **dictptr, *dict; + int meth_found = 0; + assert (*method == NULL); + if (unlikely(tp->tp_getattro != PyObject_GenericGetAttr)) { + attr = __Pyx_PyObject_GetAttrStr(obj, name); + goto try_unpack; + } + if (unlikely(tp->tp_dict == NULL) && unlikely(PyType_Ready(tp) < 0)) { + return 0; + } + descr = _PyType_Lookup(tp, name); + if (likely(descr != NULL)) { + Py_INCREF(descr); +#if defined(Py_TPFLAGS_METHOD_DESCRIPTOR) && Py_TPFLAGS_METHOD_DESCRIPTOR + if (__Pyx_PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_METHOD_DESCRIPTOR)) +#elif PY_MAJOR_VERSION >= 3 + #ifdef __Pyx_CyFunction_USED + if (likely(PyFunction_Check(descr) || __Pyx_IS_TYPE(descr, &PyMethodDescr_Type) || __Pyx_CyFunction_Check(descr))) + #else + if (likely(PyFunction_Check(descr) || __Pyx_IS_TYPE(descr, &PyMethodDescr_Type))) + #endif +#else + #ifdef __Pyx_CyFunction_USED + if (likely(PyFunction_Check(descr) || __Pyx_CyFunction_Check(descr))) + #else + if (likely(PyFunction_Check(descr))) + #endif +#endif + { + meth_found = 1; + } else { + f = Py_TYPE(descr)->tp_descr_get; + if (f != NULL && PyDescr_IsData(descr)) { + attr = f(descr, obj, (PyObject *)Py_TYPE(obj)); + Py_DECREF(descr); + goto try_unpack; + } + } + } + dictptr = _PyObject_GetDictPtr(obj); + if (dictptr != NULL && (dict = *dictptr) != NULL) { + Py_INCREF(dict); + attr = __Pyx_PyDict_GetItemStr(dict, name); + if (attr != NULL) { + Py_INCREF(attr); + Py_DECREF(dict); + Py_XDECREF(descr); + goto try_unpack; + } + Py_DECREF(dict); + } + if (meth_found) { + *method = descr; + return 1; + } + if (f != NULL) { + attr = f(descr, obj, (PyObject *)Py_TYPE(obj)); + Py_DECREF(descr); + goto try_unpack; + } + if (likely(descr != NULL)) { + *method = descr; + return 0; + } + type_name = __Pyx_PyType_GetName(tp); + PyErr_Format(PyExc_AttributeError, +#if PY_MAJOR_VERSION >= 3 + "'" __Pyx_FMT_TYPENAME "' object has no attribute '%U'", + type_name, name); +#else + "'" __Pyx_FMT_TYPENAME "' object has no attribute '%.400s'", + type_name, PyString_AS_STRING(name)); +#endif + __Pyx_DECREF_TypeName(type_name); + return 0; +#else + attr = __Pyx_PyObject_GetAttrStr(obj, name); + goto try_unpack; +#endif +try_unpack: +#if CYTHON_UNPACK_METHODS + if (likely(attr) && PyMethod_Check(attr) && likely(PyMethod_GET_SELF(attr) == obj)) { + PyObject *function = PyMethod_GET_FUNCTION(attr); + Py_INCREF(function); + Py_DECREF(attr); + *method = function; + return 1; + } +#endif + *method = attr; + return 0; +} + +/* PyObjectCallMethod0 */ +static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) { + PyObject *method = NULL, *result = NULL; + int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method); + if (likely(is_method)) { + result = __Pyx_PyObject_CallOneArg(method, obj); + Py_DECREF(method); + return result; + } + if (unlikely(!method)) goto bad; + result = __Pyx_PyObject_CallNoArg(method); + Py_DECREF(method); +bad: + return result; +} + +/* RaiseNeedMoreValuesToUnpack */ +static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) { + PyErr_Format(PyExc_ValueError, + "need more than %" CYTHON_FORMAT_SSIZE_T "d value%.1s to unpack", + index, (index == 1) ? "" : "s"); +} + +/* RaiseTooManyValuesToUnpack */ +static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) { + PyErr_Format(PyExc_ValueError, + "too many values to unpack (expected %" CYTHON_FORMAT_SSIZE_T "d)", expected); +} + +/* UnpackItemEndCheck */ +static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected) { + if (unlikely(retval)) { + Py_DECREF(retval); + __Pyx_RaiseTooManyValuesError(expected); + return -1; + } + return __Pyx_IterFinish(); +} + +/* RaiseNoneIterError */ +static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) { + PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); +} + +/* UnpackTupleError */ +static void __Pyx_UnpackTupleError(PyObject *t, Py_ssize_t index) { + if (t == Py_None) { + __Pyx_RaiseNoneNotIterableError(); + } else if (PyTuple_GET_SIZE(t) < index) { + __Pyx_RaiseNeedMoreValuesError(PyTuple_GET_SIZE(t)); + } else { + __Pyx_RaiseTooManyValuesError(index); + } +} + +/* UnpackTuple2 */ +static CYTHON_INLINE int __Pyx_unpack_tuple2_exact( + PyObject* tuple, PyObject** pvalue1, PyObject** pvalue2, int decref_tuple) { + PyObject *value1 = NULL, *value2 = NULL; +#if CYTHON_COMPILING_IN_PYPY + value1 = PySequence_ITEM(tuple, 0); if (unlikely(!value1)) goto bad; + value2 = PySequence_ITEM(tuple, 1); if (unlikely(!value2)) goto bad; +#else + value1 = PyTuple_GET_ITEM(tuple, 0); Py_INCREF(value1); + value2 = PyTuple_GET_ITEM(tuple, 1); Py_INCREF(value2); +#endif + if (decref_tuple) { + Py_DECREF(tuple); + } + *pvalue1 = value1; + *pvalue2 = value2; + return 0; +#if CYTHON_COMPILING_IN_PYPY +bad: + Py_XDECREF(value1); + Py_XDECREF(value2); + if (decref_tuple) { Py_XDECREF(tuple); } + return -1; +#endif +} +static int __Pyx_unpack_tuple2_generic(PyObject* tuple, PyObject** pvalue1, PyObject** pvalue2, + int has_known_size, int decref_tuple) { + Py_ssize_t index; + PyObject *value1 = NULL, *value2 = NULL, *iter = NULL; + iternextfunc iternext; + iter = PyObject_GetIter(tuple); + if (unlikely(!iter)) goto bad; + if (decref_tuple) { Py_DECREF(tuple); tuple = NULL; } + iternext = __Pyx_PyObject_GetIterNextFunc(iter); + value1 = iternext(iter); if (unlikely(!value1)) { index = 0; goto unpacking_failed; } + value2 = iternext(iter); if (unlikely(!value2)) { index = 1; goto unpacking_failed; } + if (!has_known_size && unlikely(__Pyx_IternextUnpackEndCheck(iternext(iter), 2))) goto bad; + Py_DECREF(iter); + *pvalue1 = value1; + *pvalue2 = value2; + return 0; +unpacking_failed: + if (!has_known_size && __Pyx_IterFinish() == 0) + __Pyx_RaiseNeedMoreValuesError(index); +bad: + Py_XDECREF(iter); + Py_XDECREF(value1); + Py_XDECREF(value2); + if (decref_tuple) { Py_XDECREF(tuple); } + return -1; +} + +/* dict_iter */ +#if CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 +#include +#endif +static CYTHON_INLINE PyObject* __Pyx_dict_iterator(PyObject* iterable, int is_dict, PyObject* method_name, + Py_ssize_t* p_orig_length, int* p_source_is_dict) { + is_dict = is_dict || likely(PyDict_CheckExact(iterable)); + *p_source_is_dict = is_dict; + if (is_dict) { +#if !CYTHON_COMPILING_IN_PYPY + *p_orig_length = PyDict_Size(iterable); + Py_INCREF(iterable); + return iterable; +#elif PY_MAJOR_VERSION >= 3 + static PyObject *py_items = NULL, *py_keys = NULL, *py_values = NULL; + PyObject **pp = NULL; + if (method_name) { + const char *name = PyUnicode_AsUTF8(method_name); + if (strcmp(name, "iteritems") == 0) pp = &py_items; + else if (strcmp(name, "iterkeys") == 0) pp = &py_keys; + else if (strcmp(name, "itervalues") == 0) pp = &py_values; + if (pp) { + if (!*pp) { + *pp = PyUnicode_FromString(name + 4); + if (!*pp) + return NULL; + } + method_name = *pp; + } + } +#endif + } + *p_orig_length = 0; + if (method_name) { + PyObject* iter; + iterable = __Pyx_PyObject_CallMethod0(iterable, method_name); + if (!iterable) + return NULL; +#if !CYTHON_COMPILING_IN_PYPY + if (PyTuple_CheckExact(iterable) || PyList_CheckExact(iterable)) + return iterable; +#endif + iter = PyObject_GetIter(iterable); + Py_DECREF(iterable); + return iter; + } + return PyObject_GetIter(iterable); +} +static CYTHON_INLINE int __Pyx_dict_iter_next( + PyObject* iter_obj, CYTHON_NCP_UNUSED Py_ssize_t orig_length, CYTHON_NCP_UNUSED Py_ssize_t* ppos, + PyObject** pkey, PyObject** pvalue, PyObject** pitem, int source_is_dict) { + PyObject* next_item; +#if !CYTHON_COMPILING_IN_PYPY + if (source_is_dict) { + PyObject *key, *value; + if (unlikely(orig_length != PyDict_Size(iter_obj))) { + PyErr_SetString(PyExc_RuntimeError, "dictionary changed size during iteration"); + return -1; + } + if (unlikely(!PyDict_Next(iter_obj, ppos, &key, &value))) { + return 0; + } + if (pitem) { + PyObject* tuple = PyTuple_New(2); + if (unlikely(!tuple)) { + return -1; + } + Py_INCREF(key); + Py_INCREF(value); + PyTuple_SET_ITEM(tuple, 0, key); + PyTuple_SET_ITEM(tuple, 1, value); + *pitem = tuple; + } else { + if (pkey) { + Py_INCREF(key); + *pkey = key; + } + if (pvalue) { + Py_INCREF(value); + *pvalue = value; + } + } + return 1; + } else if (PyTuple_CheckExact(iter_obj)) { + Py_ssize_t pos = *ppos; + if (unlikely(pos >= PyTuple_GET_SIZE(iter_obj))) return 0; + *ppos = pos + 1; + next_item = PyTuple_GET_ITEM(iter_obj, pos); + Py_INCREF(next_item); + } else if (PyList_CheckExact(iter_obj)) { + Py_ssize_t pos = *ppos; + if (unlikely(pos >= PyList_GET_SIZE(iter_obj))) return 0; + *ppos = pos + 1; + next_item = PyList_GET_ITEM(iter_obj, pos); + Py_INCREF(next_item); + } else +#endif + { + next_item = PyIter_Next(iter_obj); + if (unlikely(!next_item)) { + return __Pyx_IterFinish(); + } + } + if (pitem) { + *pitem = next_item; + } else if (pkey && pvalue) { + if (__Pyx_unpack_tuple2(next_item, pkey, pvalue, source_is_dict, source_is_dict, 1)) + return -1; + } else if (pkey) { + *pkey = next_item; + } else { + *pvalue = next_item; + } + return 1; +} + +/* UnicodeAsUCS4 */ +static CYTHON_INLINE Py_UCS4 __Pyx_PyUnicode_AsPy_UCS4(PyObject* x) { + Py_ssize_t length; + #if CYTHON_PEP393_ENABLED + length = PyUnicode_GET_LENGTH(x); + if (likely(length == 1)) { + return PyUnicode_READ_CHAR(x, 0); + } + #else + length = PyUnicode_GET_SIZE(x); + if (likely(length == 1)) { + return PyUnicode_AS_UNICODE(x)[0]; + } + #if Py_UNICODE_SIZE == 2 + else if (PyUnicode_GET_SIZE(x) == 2) { + Py_UCS4 high_val = PyUnicode_AS_UNICODE(x)[0]; + if (high_val >= 0xD800 && high_val <= 0xDBFF) { + Py_UCS4 low_val = PyUnicode_AS_UNICODE(x)[1]; + if (low_val >= 0xDC00 && low_val <= 0xDFFF) { + return 0x10000 + (((high_val & ((1<<10)-1)) << 10) | (low_val & ((1<<10)-1))); + } + } + } + #endif + #endif + PyErr_Format(PyExc_ValueError, + "only single character unicode strings can be converted to Py_UCS4, " + "got length %" CYTHON_FORMAT_SSIZE_T "d", length); + return (Py_UCS4)-1; +} + +/* object_ord */ +static long __Pyx__PyObject_Ord(PyObject* c) { + Py_ssize_t size; + if (PyBytes_Check(c)) { + size = PyBytes_GET_SIZE(c); + if (likely(size == 1)) { + return (unsigned char) PyBytes_AS_STRING(c)[0]; + } +#if PY_MAJOR_VERSION < 3 + } else if (PyUnicode_Check(c)) { + return (long)__Pyx_PyUnicode_AsPy_UCS4(c); +#endif +#if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) + } else if (PyByteArray_Check(c)) { + size = PyByteArray_GET_SIZE(c); + if (likely(size == 1)) { + return (unsigned char) PyByteArray_AS_STRING(c)[0]; + } +#endif + } else { + __Pyx_TypeName c_type_name = __Pyx_PyType_GetName(Py_TYPE(c)); + PyErr_Format(PyExc_TypeError, + "ord() expected string of length 1, but " __Pyx_FMT_TYPENAME " found", + c_type_name); + __Pyx_DECREF_TypeName(c_type_name); + return (long)(Py_UCS4)-1; + } + PyErr_Format(PyExc_TypeError, + "ord() expected a character, but string of length %zd found", size); + return (long)(Py_UCS4)-1; +} + +/* PyDictVersioning */ +#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) { + PyObject *dict = Py_TYPE(obj)->tp_dict; + return likely(dict) ? __PYX_GET_DICT_VERSION(dict) : 0; +} +static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj) { + PyObject **dictptr = NULL; + Py_ssize_t offset = Py_TYPE(obj)->tp_dictoffset; + if (offset) { +#if CYTHON_COMPILING_IN_CPYTHON + dictptr = (likely(offset > 0)) ? (PyObject **) ((char *)obj + offset) : _PyObject_GetDictPtr(obj); +#else + dictptr = _PyObject_GetDictPtr(obj); +#endif + } + return (dictptr && *dictptr) ? __PYX_GET_DICT_VERSION(*dictptr) : 0; +} +static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version) { + PyObject *dict = Py_TYPE(obj)->tp_dict; + if (unlikely(!dict) || unlikely(tp_dict_version != __PYX_GET_DICT_VERSION(dict))) + return 0; + return obj_dict_version == __Pyx_get_object_dict_version(obj); +} +#endif + +/* GetModuleGlobalName */ +#if CYTHON_USE_DICT_VERSIONS +static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value) +#else +static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name) +#endif +{ + PyObject *result; +#if !CYTHON_AVOID_BORROWED_REFS +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1 && PY_VERSION_HEX < 0x030d0000 + result = _PyDict_GetItem_KnownHash(__pyx_d, name, ((PyASCIIObject *) name)->hash); + __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) + if (likely(result)) { + return __Pyx_NewRef(result); + } else if (unlikely(PyErr_Occurred())) { + return NULL; + } +#elif CYTHON_COMPILING_IN_LIMITED_API + if (unlikely(!__pyx_m)) { + return NULL; + } + result = PyObject_GetAttr(__pyx_m, name); + if (likely(result)) { + return result; + } +#else + result = PyDict_GetItem(__pyx_d, name); + __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) + if (likely(result)) { + return __Pyx_NewRef(result); + } +#endif +#else + result = PyObject_GetItem(__pyx_d, name); + __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) + if (likely(result)) { + return __Pyx_NewRef(result); + } + PyErr_Clear(); +#endif + return __Pyx_GetBuiltinName(name); +} + +/* FixUpExtensionType */ +#if CYTHON_USE_TYPE_SPECS +static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type) { +#if PY_VERSION_HEX > 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + CYTHON_UNUSED_VAR(spec); + CYTHON_UNUSED_VAR(type); +#else + const PyType_Slot *slot = spec->slots; + while (slot && slot->slot && slot->slot != Py_tp_members) + slot++; + if (slot && slot->slot == Py_tp_members) { + int changed = 0; +#if !(PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON) + const +#endif + PyMemberDef *memb = (PyMemberDef*) slot->pfunc; + while (memb && memb->name) { + if (memb->name[0] == '_' && memb->name[1] == '_') { +#if PY_VERSION_HEX < 0x030900b1 + if (strcmp(memb->name, "__weaklistoffset__") == 0) { + assert(memb->type == T_PYSSIZET); + assert(memb->flags == READONLY); + type->tp_weaklistoffset = memb->offset; + changed = 1; + } + else if (strcmp(memb->name, "__dictoffset__") == 0) { + assert(memb->type == T_PYSSIZET); + assert(memb->flags == READONLY); + type->tp_dictoffset = memb->offset; + changed = 1; + } +#if CYTHON_METH_FASTCALL + else if (strcmp(memb->name, "__vectorcalloffset__") == 0) { + assert(memb->type == T_PYSSIZET); + assert(memb->flags == READONLY); +#if PY_VERSION_HEX >= 0x030800b4 + type->tp_vectorcall_offset = memb->offset; +#else + type->tp_print = (printfunc) memb->offset; +#endif + changed = 1; + } +#endif +#else + if ((0)); +#endif +#if PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON + else if (strcmp(memb->name, "__module__") == 0) { + PyObject *descr; + assert(memb->type == T_OBJECT); + assert(memb->flags == 0 || memb->flags == READONLY); + descr = PyDescr_NewMember(type, memb); + if (unlikely(!descr)) + return -1; + if (unlikely(PyDict_SetItem(type->tp_dict, PyDescr_NAME(descr), descr) < 0)) { + Py_DECREF(descr); + return -1; + } + Py_DECREF(descr); + changed = 1; + } +#endif + } + memb++; + } + if (changed) + PyType_Modified(type); + } +#endif + return 0; +} +#endif + +/* FetchSharedCythonModule */ +static PyObject *__Pyx_FetchSharedCythonABIModule(void) { + return __Pyx_PyImport_AddModuleRef((char*) __PYX_ABI_MODULE_NAME); +} + +/* FetchCommonType */ +static int __Pyx_VerifyCachedType(PyObject *cached_type, + const char *name, + Py_ssize_t basicsize, + Py_ssize_t expected_basicsize) { + if (!PyType_Check(cached_type)) { + PyErr_Format(PyExc_TypeError, + "Shared Cython type %.200s is not a type object", name); + return -1; + } + if (basicsize != expected_basicsize) { + PyErr_Format(PyExc_TypeError, + "Shared Cython type %.200s has the wrong size, try recompiling", + name); + return -1; + } + return 0; +} +#if !CYTHON_USE_TYPE_SPECS +static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type) { + PyObject* abi_module; + const char* object_name; + PyTypeObject *cached_type = NULL; + abi_module = __Pyx_FetchSharedCythonABIModule(); + if (!abi_module) return NULL; + object_name = strrchr(type->tp_name, '.'); + object_name = object_name ? object_name+1 : type->tp_name; + cached_type = (PyTypeObject*) PyObject_GetAttrString(abi_module, object_name); + if (cached_type) { + if (__Pyx_VerifyCachedType( + (PyObject *)cached_type, + object_name, + cached_type->tp_basicsize, + type->tp_basicsize) < 0) { + goto bad; + } + goto done; + } + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; + PyErr_Clear(); + if (PyType_Ready(type) < 0) goto bad; + if (PyObject_SetAttrString(abi_module, object_name, (PyObject *)type) < 0) + goto bad; + Py_INCREF(type); + cached_type = type; +done: + Py_DECREF(abi_module); + return cached_type; +bad: + Py_XDECREF(cached_type); + cached_type = NULL; + goto done; +} +#else +static PyTypeObject *__Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) { + PyObject *abi_module, *cached_type = NULL; + const char* object_name = strrchr(spec->name, '.'); + object_name = object_name ? object_name+1 : spec->name; + abi_module = __Pyx_FetchSharedCythonABIModule(); + if (!abi_module) return NULL; + cached_type = PyObject_GetAttrString(abi_module, object_name); + if (cached_type) { + Py_ssize_t basicsize; +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject *py_basicsize; + py_basicsize = PyObject_GetAttrString(cached_type, "__basicsize__"); + if (unlikely(!py_basicsize)) goto bad; + basicsize = PyLong_AsSsize_t(py_basicsize); + Py_DECREF(py_basicsize); + py_basicsize = 0; + if (unlikely(basicsize == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; +#else + basicsize = likely(PyType_Check(cached_type)) ? ((PyTypeObject*) cached_type)->tp_basicsize : -1; +#endif + if (__Pyx_VerifyCachedType( + cached_type, + object_name, + basicsize, + spec->basicsize) < 0) { + goto bad; + } + goto done; + } + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; + PyErr_Clear(); + CYTHON_UNUSED_VAR(module); + cached_type = __Pyx_PyType_FromModuleAndSpec(abi_module, spec, bases); + if (unlikely(!cached_type)) goto bad; + if (unlikely(__Pyx_fix_up_extension_type_from_spec(spec, (PyTypeObject *) cached_type) < 0)) goto bad; + if (PyObject_SetAttrString(abi_module, object_name, cached_type) < 0) goto bad; +done: + Py_DECREF(abi_module); + assert(cached_type == NULL || PyType_Check(cached_type)); + return (PyTypeObject *) cached_type; +bad: + Py_XDECREF(cached_type); + cached_type = NULL; + goto done; +} +#endif + +/* PyVectorcallFastCallDict */ +#if CYTHON_METH_FASTCALL +static PyObject *__Pyx_PyVectorcall_FastCallDict_kw(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) +{ + PyObject *res = NULL; + PyObject *kwnames; + PyObject **newargs; + PyObject **kwvalues; + Py_ssize_t i, pos; + size_t j; + PyObject *key, *value; + unsigned long keys_are_strings; + Py_ssize_t nkw = PyDict_GET_SIZE(kw); + newargs = (PyObject **)PyMem_Malloc((nargs + (size_t)nkw) * sizeof(args[0])); + if (unlikely(newargs == NULL)) { + PyErr_NoMemory(); + return NULL; + } + for (j = 0; j < nargs; j++) newargs[j] = args[j]; + kwnames = PyTuple_New(nkw); + if (unlikely(kwnames == NULL)) { + PyMem_Free(newargs); + return NULL; + } + kwvalues = newargs + nargs; + pos = i = 0; + keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS; + while (PyDict_Next(kw, &pos, &key, &value)) { + keys_are_strings &= Py_TYPE(key)->tp_flags; + Py_INCREF(key); + Py_INCREF(value); + PyTuple_SET_ITEM(kwnames, i, key); + kwvalues[i] = value; + i++; + } + if (unlikely(!keys_are_strings)) { + PyErr_SetString(PyExc_TypeError, "keywords must be strings"); + goto cleanup; + } + res = vc(func, newargs, nargs, kwnames); +cleanup: + Py_DECREF(kwnames); + for (i = 0; i < nkw; i++) + Py_DECREF(kwvalues[i]); + PyMem_Free(newargs); + return res; +} +static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) +{ + if (likely(kw == NULL) || PyDict_GET_SIZE(kw) == 0) { + return vc(func, args, nargs, NULL); + } + return __Pyx_PyVectorcall_FastCallDict_kw(func, vc, args, nargs, kw); +} +#endif + +/* CythonFunctionShared */ +#if CYTHON_COMPILING_IN_LIMITED_API +static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { + if (__Pyx_CyFunction_Check(func)) { + return PyCFunction_GetFunction(((__pyx_CyFunctionObject*)func)->func) == (PyCFunction) cfunc; + } else if (PyCFunction_Check(func)) { + return PyCFunction_GetFunction(func) == (PyCFunction) cfunc; + } + return 0; +} +#else +static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { + return __Pyx_CyOrPyCFunction_Check(func) && __Pyx_CyOrPyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; +} +#endif +static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj) { +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + __Pyx_Py_XDECREF_SET( + __Pyx_CyFunction_GetClassObj(f), + ((classobj) ? __Pyx_NewRef(classobj) : NULL)); +#else + __Pyx_Py_XDECREF_SET( + ((PyCMethodObject *) (f))->mm_class, + (PyTypeObject*)((classobj) ? __Pyx_NewRef(classobj) : NULL)); +#endif +} +static PyObject * +__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, void *closure) +{ + CYTHON_UNUSED_VAR(closure); + if (unlikely(op->func_doc == NULL)) { +#if CYTHON_COMPILING_IN_LIMITED_API + op->func_doc = PyObject_GetAttrString(op->func, "__doc__"); + if (unlikely(!op->func_doc)) return NULL; +#else + if (((PyCFunctionObject*)op)->m_ml->ml_doc) { +#if PY_MAJOR_VERSION >= 3 + op->func_doc = PyUnicode_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); +#else + op->func_doc = PyString_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); +#endif + if (unlikely(op->func_doc == NULL)) + return NULL; + } else { + Py_INCREF(Py_None); + return Py_None; + } +#endif + } + Py_INCREF(op->func_doc); + return op->func_doc; +} +static int +__Pyx_CyFunction_set_doc(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (value == NULL) { + value = Py_None; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_doc, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_name(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (unlikely(op->func_name == NULL)) { +#if CYTHON_COMPILING_IN_LIMITED_API + op->func_name = PyObject_GetAttrString(op->func, "__name__"); +#elif PY_MAJOR_VERSION >= 3 + op->func_name = PyUnicode_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); +#else + op->func_name = PyString_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); +#endif + if (unlikely(op->func_name == NULL)) + return NULL; + } + Py_INCREF(op->func_name); + return op->func_name; +} +static int +__Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); +#if PY_MAJOR_VERSION >= 3 + if (unlikely(value == NULL || !PyUnicode_Check(value))) +#else + if (unlikely(value == NULL || !PyString_Check(value))) +#endif + { + PyErr_SetString(PyExc_TypeError, + "__name__ must be set to a string object"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_name, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_qualname(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + Py_INCREF(op->func_qualname); + return op->func_qualname; +} +static int +__Pyx_CyFunction_set_qualname(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); +#if PY_MAJOR_VERSION >= 3 + if (unlikely(value == NULL || !PyUnicode_Check(value))) +#else + if (unlikely(value == NULL || !PyString_Check(value))) +#endif + { + PyErr_SetString(PyExc_TypeError, + "__qualname__ must be set to a string object"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_qualname, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_dict(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (unlikely(op->func_dict == NULL)) { + op->func_dict = PyDict_New(); + if (unlikely(op->func_dict == NULL)) + return NULL; + } + Py_INCREF(op->func_dict); + return op->func_dict; +} +static int +__Pyx_CyFunction_set_dict(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (unlikely(value == NULL)) { + PyErr_SetString(PyExc_TypeError, + "function's dictionary may not be deleted"); + return -1; + } + if (unlikely(!PyDict_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "setting function's dictionary to a non-dict"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_dict, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_globals(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + Py_INCREF(op->func_globals); + return op->func_globals; +} +static PyObject * +__Pyx_CyFunction_get_closure(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(op); + CYTHON_UNUSED_VAR(context); + Py_INCREF(Py_None); + return Py_None; +} +static PyObject * +__Pyx_CyFunction_get_code(__pyx_CyFunctionObject *op, void *context) +{ + PyObject* result = (op->func_code) ? op->func_code : Py_None; + CYTHON_UNUSED_VAR(context); + Py_INCREF(result); + return result; +} +static int +__Pyx_CyFunction_init_defaults(__pyx_CyFunctionObject *op) { + int result = 0; + PyObject *res = op->defaults_getter((PyObject *) op); + if (unlikely(!res)) + return -1; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + op->defaults_tuple = PyTuple_GET_ITEM(res, 0); + Py_INCREF(op->defaults_tuple); + op->defaults_kwdict = PyTuple_GET_ITEM(res, 1); + Py_INCREF(op->defaults_kwdict); + #else + op->defaults_tuple = __Pyx_PySequence_ITEM(res, 0); + if (unlikely(!op->defaults_tuple)) result = -1; + else { + op->defaults_kwdict = __Pyx_PySequence_ITEM(res, 1); + if (unlikely(!op->defaults_kwdict)) result = -1; + } + #endif + Py_DECREF(res); + return result; +} +static int +__Pyx_CyFunction_set_defaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + if (!value) { + value = Py_None; + } else if (unlikely(value != Py_None && !PyTuple_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "__defaults__ must be set to a tuple object"); + return -1; + } + PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__defaults__ will not " + "currently affect the values used in function calls", 1); + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->defaults_tuple, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_defaults(__pyx_CyFunctionObject *op, void *context) { + PyObject* result = op->defaults_tuple; + CYTHON_UNUSED_VAR(context); + if (unlikely(!result)) { + if (op->defaults_getter) { + if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; + result = op->defaults_tuple; + } else { + result = Py_None; + } + } + Py_INCREF(result); + return result; +} +static int +__Pyx_CyFunction_set_kwdefaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + if (!value) { + value = Py_None; + } else if (unlikely(value != Py_None && !PyDict_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "__kwdefaults__ must be set to a dict object"); + return -1; + } + PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__kwdefaults__ will not " + "currently affect the values used in function calls", 1); + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->defaults_kwdict, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_kwdefaults(__pyx_CyFunctionObject *op, void *context) { + PyObject* result = op->defaults_kwdict; + CYTHON_UNUSED_VAR(context); + if (unlikely(!result)) { + if (op->defaults_getter) { + if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; + result = op->defaults_kwdict; + } else { + result = Py_None; + } + } + Py_INCREF(result); + return result; +} +static int +__Pyx_CyFunction_set_annotations(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + if (!value || value == Py_None) { + value = NULL; + } else if (unlikely(!PyDict_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "__annotations__ must be set to a dict object"); + return -1; + } + Py_XINCREF(value); + __Pyx_Py_XDECREF_SET(op->func_annotations, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_annotations(__pyx_CyFunctionObject *op, void *context) { + PyObject* result = op->func_annotations; + CYTHON_UNUSED_VAR(context); + if (unlikely(!result)) { + result = PyDict_New(); + if (unlikely(!result)) return NULL; + op->func_annotations = result; + } + Py_INCREF(result); + return result; +} +static PyObject * +__Pyx_CyFunction_get_is_coroutine(__pyx_CyFunctionObject *op, void *context) { + int is_coroutine; + CYTHON_UNUSED_VAR(context); + if (op->func_is_coroutine) { + return __Pyx_NewRef(op->func_is_coroutine); + } + is_coroutine = op->flags & __Pyx_CYFUNCTION_COROUTINE; +#if PY_VERSION_HEX >= 0x03050000 + if (is_coroutine) { + PyObject *module, *fromlist, *marker = __pyx_n_s_is_coroutine; + fromlist = PyList_New(1); + if (unlikely(!fromlist)) return NULL; + Py_INCREF(marker); +#if CYTHON_ASSUME_SAFE_MACROS + PyList_SET_ITEM(fromlist, 0, marker); +#else + if (unlikely(PyList_SetItem(fromlist, 0, marker) < 0)) { + Py_DECREF(marker); + Py_DECREF(fromlist); + return NULL; + } +#endif + module = PyImport_ImportModuleLevelObject(__pyx_n_s_asyncio_coroutines, NULL, NULL, fromlist, 0); + Py_DECREF(fromlist); + if (unlikely(!module)) goto ignore; + op->func_is_coroutine = __Pyx_PyObject_GetAttrStr(module, marker); + Py_DECREF(module); + if (likely(op->func_is_coroutine)) { + return __Pyx_NewRef(op->func_is_coroutine); + } +ignore: + PyErr_Clear(); + } +#endif + op->func_is_coroutine = __Pyx_PyBool_FromLong(is_coroutine); + return __Pyx_NewRef(op->func_is_coroutine); +} +#if CYTHON_COMPILING_IN_LIMITED_API +static PyObject * +__Pyx_CyFunction_get_module(__pyx_CyFunctionObject *op, void *context) { + CYTHON_UNUSED_VAR(context); + return PyObject_GetAttrString(op->func, "__module__"); +} +static int +__Pyx_CyFunction_set_module(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + return PyObject_SetAttrString(op->func, "__module__", value); +} +#endif +static PyGetSetDef __pyx_CyFunction_getsets[] = { + {(char *) "func_doc", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, + {(char *) "__doc__", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, + {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, + {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, + {(char *) "__qualname__", (getter)__Pyx_CyFunction_get_qualname, (setter)__Pyx_CyFunction_set_qualname, 0, 0}, + {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, + {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, + {(char *) "func_globals", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, + {(char *) "__globals__", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, + {(char *) "func_closure", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, + {(char *) "__closure__", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, + {(char *) "func_code", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, + {(char *) "__code__", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, + {(char *) "func_defaults", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, + {(char *) "__defaults__", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, + {(char *) "__kwdefaults__", (getter)__Pyx_CyFunction_get_kwdefaults, (setter)__Pyx_CyFunction_set_kwdefaults, 0, 0}, + {(char *) "__annotations__", (getter)__Pyx_CyFunction_get_annotations, (setter)__Pyx_CyFunction_set_annotations, 0, 0}, + {(char *) "_is_coroutine", (getter)__Pyx_CyFunction_get_is_coroutine, 0, 0, 0}, +#if CYTHON_COMPILING_IN_LIMITED_API + {"__module__", (getter)__Pyx_CyFunction_get_module, (setter)__Pyx_CyFunction_set_module, 0, 0}, +#endif + {0, 0, 0, 0, 0} +}; +static PyMemberDef __pyx_CyFunction_members[] = { +#if !CYTHON_COMPILING_IN_LIMITED_API + {(char *) "__module__", T_OBJECT, offsetof(PyCFunctionObject, m_module), 0, 0}, +#endif +#if CYTHON_USE_TYPE_SPECS + {(char *) "__dictoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_dict), READONLY, 0}, +#if CYTHON_METH_FASTCALL +#if CYTHON_BACKPORT_VECTORCALL + {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_vectorcall), READONLY, 0}, +#else +#if !CYTHON_COMPILING_IN_LIMITED_API + {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(PyCFunctionObject, vectorcall), READONLY, 0}, +#endif +#endif +#endif +#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API + {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_weakreflist), READONLY, 0}, +#else + {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(PyCFunctionObject, m_weakreflist), READONLY, 0}, +#endif +#endif + {0, 0, 0, 0, 0} +}; +static PyObject * +__Pyx_CyFunction_reduce(__pyx_CyFunctionObject *m, PyObject *args) +{ + CYTHON_UNUSED_VAR(args); +#if PY_MAJOR_VERSION >= 3 + Py_INCREF(m->func_qualname); + return m->func_qualname; +#else + return PyString_FromString(((PyCFunctionObject*)m)->m_ml->ml_name); +#endif +} +static PyMethodDef __pyx_CyFunction_methods[] = { + {"__reduce__", (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0}, + {0, 0, 0, 0} +}; +#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API +#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func_weakreflist) +#else +#define __Pyx_CyFunction_weakreflist(cyfunc) (((PyCFunctionObject*)cyfunc)->m_weakreflist) +#endif +static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname, + PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { +#if !CYTHON_COMPILING_IN_LIMITED_API + PyCFunctionObject *cf = (PyCFunctionObject*) op; +#endif + if (unlikely(op == NULL)) + return NULL; +#if CYTHON_COMPILING_IN_LIMITED_API + op->func = PyCFunction_NewEx(ml, (PyObject*)op, module); + if (unlikely(!op->func)) return NULL; +#endif + op->flags = flags; + __Pyx_CyFunction_weakreflist(op) = NULL; +#if !CYTHON_COMPILING_IN_LIMITED_API + cf->m_ml = ml; + cf->m_self = (PyObject *) op; +#endif + Py_XINCREF(closure); + op->func_closure = closure; +#if !CYTHON_COMPILING_IN_LIMITED_API + Py_XINCREF(module); + cf->m_module = module; +#endif + op->func_dict = NULL; + op->func_name = NULL; + Py_INCREF(qualname); + op->func_qualname = qualname; + op->func_doc = NULL; +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + op->func_classobj = NULL; +#else + ((PyCMethodObject*)op)->mm_class = NULL; +#endif + op->func_globals = globals; + Py_INCREF(op->func_globals); + Py_XINCREF(code); + op->func_code = code; + op->defaults_pyobjects = 0; + op->defaults_size = 0; + op->defaults = NULL; + op->defaults_tuple = NULL; + op->defaults_kwdict = NULL; + op->defaults_getter = NULL; + op->func_annotations = NULL; + op->func_is_coroutine = NULL; +#if CYTHON_METH_FASTCALL + switch (ml->ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { + case METH_NOARGS: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_NOARGS; + break; + case METH_O: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_O; + break; + case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD; + break; + case METH_FASTCALL | METH_KEYWORDS: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS; + break; + case METH_VARARGS | METH_KEYWORDS: + __Pyx_CyFunction_func_vectorcall(op) = NULL; + break; + default: + PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); + Py_DECREF(op); + return NULL; + } +#endif + return (PyObject *) op; +} +static int +__Pyx_CyFunction_clear(__pyx_CyFunctionObject *m) +{ + Py_CLEAR(m->func_closure); +#if CYTHON_COMPILING_IN_LIMITED_API + Py_CLEAR(m->func); +#else + Py_CLEAR(((PyCFunctionObject*)m)->m_module); +#endif + Py_CLEAR(m->func_dict); + Py_CLEAR(m->func_name); + Py_CLEAR(m->func_qualname); + Py_CLEAR(m->func_doc); + Py_CLEAR(m->func_globals); + Py_CLEAR(m->func_code); +#if !CYTHON_COMPILING_IN_LIMITED_API +#if PY_VERSION_HEX < 0x030900B1 + Py_CLEAR(__Pyx_CyFunction_GetClassObj(m)); +#else + { + PyObject *cls = (PyObject*) ((PyCMethodObject *) (m))->mm_class; + ((PyCMethodObject *) (m))->mm_class = NULL; + Py_XDECREF(cls); + } +#endif +#endif + Py_CLEAR(m->defaults_tuple); + Py_CLEAR(m->defaults_kwdict); + Py_CLEAR(m->func_annotations); + Py_CLEAR(m->func_is_coroutine); + if (m->defaults) { + PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); + int i; + for (i = 0; i < m->defaults_pyobjects; i++) + Py_XDECREF(pydefaults[i]); + PyObject_Free(m->defaults); + m->defaults = NULL; + } + return 0; +} +static void __Pyx__CyFunction_dealloc(__pyx_CyFunctionObject *m) +{ + if (__Pyx_CyFunction_weakreflist(m) != NULL) + PyObject_ClearWeakRefs((PyObject *) m); + __Pyx_CyFunction_clear(m); + __Pyx_PyHeapTypeObject_GC_Del(m); +} +static void __Pyx_CyFunction_dealloc(__pyx_CyFunctionObject *m) +{ + PyObject_GC_UnTrack(m); + __Pyx__CyFunction_dealloc(m); +} +static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit, void *arg) +{ + Py_VISIT(m->func_closure); +#if CYTHON_COMPILING_IN_LIMITED_API + Py_VISIT(m->func); +#else + Py_VISIT(((PyCFunctionObject*)m)->m_module); +#endif + Py_VISIT(m->func_dict); + Py_VISIT(m->func_name); + Py_VISIT(m->func_qualname); + Py_VISIT(m->func_doc); + Py_VISIT(m->func_globals); + Py_VISIT(m->func_code); +#if !CYTHON_COMPILING_IN_LIMITED_API + Py_VISIT(__Pyx_CyFunction_GetClassObj(m)); +#endif + Py_VISIT(m->defaults_tuple); + Py_VISIT(m->defaults_kwdict); + Py_VISIT(m->func_is_coroutine); + if (m->defaults) { + PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); + int i; + for (i = 0; i < m->defaults_pyobjects; i++) + Py_VISIT(pydefaults[i]); + } + return 0; +} +static PyObject* +__Pyx_CyFunction_repr(__pyx_CyFunctionObject *op) +{ +#if PY_MAJOR_VERSION >= 3 + return PyUnicode_FromFormat("", + op->func_qualname, (void *)op); +#else + return PyString_FromFormat("", + PyString_AsString(op->func_qualname), (void *)op); +#endif +} +static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, PyObject *arg, PyObject *kw) { +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject *f = ((__pyx_CyFunctionObject*)func)->func; + PyObject *py_name = NULL; + PyCFunction meth; + int flags; + meth = PyCFunction_GetFunction(f); + if (unlikely(!meth)) return NULL; + flags = PyCFunction_GetFlags(f); + if (unlikely(flags < 0)) return NULL; +#else + PyCFunctionObject* f = (PyCFunctionObject*)func; + PyCFunction meth = f->m_ml->ml_meth; + int flags = f->m_ml->ml_flags; +#endif + Py_ssize_t size; + switch (flags & (METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O)) { + case METH_VARARGS: + if (likely(kw == NULL || PyDict_Size(kw) == 0)) + return (*meth)(self, arg); + break; + case METH_VARARGS | METH_KEYWORDS: + return (*(PyCFunctionWithKeywords)(void*)meth)(self, arg, kw); + case METH_NOARGS: + if (likely(kw == NULL || PyDict_Size(kw) == 0)) { +#if CYTHON_ASSUME_SAFE_MACROS + size = PyTuple_GET_SIZE(arg); +#else + size = PyTuple_Size(arg); + if (unlikely(size < 0)) return NULL; +#endif + if (likely(size == 0)) + return (*meth)(self, NULL); +#if CYTHON_COMPILING_IN_LIMITED_API + py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); + if (!py_name) return NULL; + PyErr_Format(PyExc_TypeError, + "%.200S() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", + py_name, size); + Py_DECREF(py_name); +#else + PyErr_Format(PyExc_TypeError, + "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", + f->m_ml->ml_name, size); +#endif + return NULL; + } + break; + case METH_O: + if (likely(kw == NULL || PyDict_Size(kw) == 0)) { +#if CYTHON_ASSUME_SAFE_MACROS + size = PyTuple_GET_SIZE(arg); +#else + size = PyTuple_Size(arg); + if (unlikely(size < 0)) return NULL; +#endif + if (likely(size == 1)) { + PyObject *result, *arg0; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + arg0 = PyTuple_GET_ITEM(arg, 0); + #else + arg0 = __Pyx_PySequence_ITEM(arg, 0); if (unlikely(!arg0)) return NULL; + #endif + result = (*meth)(self, arg0); + #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) + Py_DECREF(arg0); + #endif + return result; + } +#if CYTHON_COMPILING_IN_LIMITED_API + py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); + if (!py_name) return NULL; + PyErr_Format(PyExc_TypeError, + "%.200S() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", + py_name, size); + Py_DECREF(py_name); +#else + PyErr_Format(PyExc_TypeError, + "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", + f->m_ml->ml_name, size); +#endif + return NULL; + } + break; + default: + PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); + return NULL; + } +#if CYTHON_COMPILING_IN_LIMITED_API + py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); + if (!py_name) return NULL; + PyErr_Format(PyExc_TypeError, "%.200S() takes no keyword arguments", + py_name); + Py_DECREF(py_name); +#else + PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments", + f->m_ml->ml_name); +#endif + return NULL; +} +static CYTHON_INLINE PyObject *__Pyx_CyFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *self, *result; +#if CYTHON_COMPILING_IN_LIMITED_API + self = PyCFunction_GetSelf(((__pyx_CyFunctionObject*)func)->func); + if (unlikely(!self) && PyErr_Occurred()) return NULL; +#else + self = ((PyCFunctionObject*)func)->m_self; +#endif + result = __Pyx_CyFunction_CallMethod(func, self, arg, kw); + return result; +} +static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, PyObject *kw) { + PyObject *result; + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func; +#if CYTHON_METH_FASTCALL + __pyx_vectorcallfunc vc = __Pyx_CyFunction_func_vectorcall(cyfunc); + if (vc) { +#if CYTHON_ASSUME_SAFE_MACROS + return __Pyx_PyVectorcall_FastCallDict(func, vc, &PyTuple_GET_ITEM(args, 0), (size_t)PyTuple_GET_SIZE(args), kw); +#else + (void) &__Pyx_PyVectorcall_FastCallDict; + return PyVectorcall_Call(func, args, kw); +#endif + } +#endif + if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { + Py_ssize_t argc; + PyObject *new_args; + PyObject *self; +#if CYTHON_ASSUME_SAFE_MACROS + argc = PyTuple_GET_SIZE(args); +#else + argc = PyTuple_Size(args); + if (unlikely(!argc) < 0) return NULL; +#endif + new_args = PyTuple_GetSlice(args, 1, argc); + if (unlikely(!new_args)) + return NULL; + self = PyTuple_GetItem(args, 0); + if (unlikely(!self)) { + Py_DECREF(new_args); +#if PY_MAJOR_VERSION > 2 + PyErr_Format(PyExc_TypeError, + "unbound method %.200S() needs an argument", + cyfunc->func_qualname); +#else + PyErr_SetString(PyExc_TypeError, + "unbound method needs an argument"); +#endif + return NULL; + } + result = __Pyx_CyFunction_CallMethod(func, self, new_args, kw); + Py_DECREF(new_args); + } else { + result = __Pyx_CyFunction_Call(func, args, kw); + } + return result; +} +#if CYTHON_METH_FASTCALL +static CYTHON_INLINE int __Pyx_CyFunction_Vectorcall_CheckArgs(__pyx_CyFunctionObject *cyfunc, Py_ssize_t nargs, PyObject *kwnames) +{ + int ret = 0; + if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { + if (unlikely(nargs < 1)) { + PyErr_Format(PyExc_TypeError, "%.200s() needs an argument", + ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); + return -1; + } + ret = 1; + } + if (unlikely(kwnames) && unlikely(PyTuple_GET_SIZE(kwnames))) { + PyErr_Format(PyExc_TypeError, + "%.200s() takes no keyword arguments", ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); + return -1; + } + return ret; +} +static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + if (unlikely(nargs != 0)) { + PyErr_Format(PyExc_TypeError, + "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", + def->ml_name, nargs); + return NULL; + } + return def->ml_meth(self, NULL); +} +static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + if (unlikely(nargs != 1)) { + PyErr_Format(PyExc_TypeError, + "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", + def->ml_name, nargs); + return NULL; + } + return def->ml_meth(self, args[0]); +} +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + return ((__Pyx_PyCFunctionFastWithKeywords)(void(*)(void))def->ml_meth)(self, args, nargs, kwnames); +} +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; + PyTypeObject *cls = (PyTypeObject *) __Pyx_CyFunction_GetClassObj(cyfunc); +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + return ((__Pyx_PyCMethod)(void(*)(void))def->ml_meth)(self, cls, args, (size_t)nargs, kwnames); +} +#endif +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_CyFunctionType_slots[] = { + {Py_tp_dealloc, (void *)__Pyx_CyFunction_dealloc}, + {Py_tp_repr, (void *)__Pyx_CyFunction_repr}, + {Py_tp_call, (void *)__Pyx_CyFunction_CallAsMethod}, + {Py_tp_traverse, (void *)__Pyx_CyFunction_traverse}, + {Py_tp_clear, (void *)__Pyx_CyFunction_clear}, + {Py_tp_methods, (void *)__pyx_CyFunction_methods}, + {Py_tp_members, (void *)__pyx_CyFunction_members}, + {Py_tp_getset, (void *)__pyx_CyFunction_getsets}, + {Py_tp_descr_get, (void *)__Pyx_PyMethod_New}, + {0, 0}, +}; +static PyType_Spec __pyx_CyFunctionType_spec = { + __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", + sizeof(__pyx_CyFunctionObject), + 0, +#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR + Py_TPFLAGS_METHOD_DESCRIPTOR | +#endif +#if (defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL) + _Py_TPFLAGS_HAVE_VECTORCALL | +#endif + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, + __pyx_CyFunctionType_slots +}; +#else +static PyTypeObject __pyx_CyFunctionType_type = { + PyVarObject_HEAD_INIT(0, 0) + __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", + sizeof(__pyx_CyFunctionObject), + 0, + (destructor) __Pyx_CyFunction_dealloc, +#if !CYTHON_METH_FASTCALL + 0, +#elif CYTHON_BACKPORT_VECTORCALL + (printfunc)offsetof(__pyx_CyFunctionObject, func_vectorcall), +#else + offsetof(PyCFunctionObject, vectorcall), +#endif + 0, + 0, +#if PY_MAJOR_VERSION < 3 + 0, +#else + 0, +#endif + (reprfunc) __Pyx_CyFunction_repr, + 0, + 0, + 0, + 0, + __Pyx_CyFunction_CallAsMethod, + 0, + 0, + 0, + 0, +#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR + Py_TPFLAGS_METHOD_DESCRIPTOR | +#endif +#if defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL + _Py_TPFLAGS_HAVE_VECTORCALL | +#endif + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, + 0, + (traverseproc) __Pyx_CyFunction_traverse, + (inquiry) __Pyx_CyFunction_clear, + 0, +#if PY_VERSION_HEX < 0x030500A0 + offsetof(__pyx_CyFunctionObject, func_weakreflist), +#else + offsetof(PyCFunctionObject, m_weakreflist), +#endif + 0, + 0, + __pyx_CyFunction_methods, + __pyx_CyFunction_members, + __pyx_CyFunction_getsets, + 0, + 0, + __Pyx_PyMethod_New, + 0, + offsetof(__pyx_CyFunctionObject, func_dict), + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +#if PY_VERSION_HEX >= 0x030400a1 + 0, +#endif +#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, +#endif +#if __PYX_NEED_TP_PRINT_SLOT + 0, +#endif +#if PY_VERSION_HEX >= 0x030C0000 + 0, +#endif +#if PY_VERSION_HEX >= 0x030d00A4 + 0, +#endif +#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, +#endif +}; +#endif +static int __pyx_CyFunction_init(PyObject *module) { +#if CYTHON_USE_TYPE_SPECS + __pyx_CyFunctionType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_CyFunctionType_spec, NULL); +#else + CYTHON_UNUSED_VAR(module); + __pyx_CyFunctionType = __Pyx_FetchCommonType(&__pyx_CyFunctionType_type); +#endif + if (unlikely(__pyx_CyFunctionType == NULL)) { + return -1; + } + return 0; +} +static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t size, int pyobjects) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->defaults = PyObject_Malloc(size); + if (unlikely(!m->defaults)) + return PyErr_NoMemory(); + memset(m->defaults, 0, size); + m->defaults_pyobjects = pyobjects; + m->defaults_size = size; + return m->defaults; +} +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->defaults_tuple = tuple; + Py_INCREF(tuple); +} +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *func, PyObject *dict) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->defaults_kwdict = dict; + Py_INCREF(dict); +} +static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, PyObject *dict) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->func_annotations = dict; + Py_INCREF(dict); +} + +/* CythonFunction */ +static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qualname, + PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { + PyObject *op = __Pyx_CyFunction_Init( + PyObject_GC_New(__pyx_CyFunctionObject, __pyx_CyFunctionType), + ml, flags, qualname, closure, module, globals, code + ); + if (likely(op)) { + PyObject_GC_Track(op); + } + return op; +} + +/* RaiseUnexpectedTypeError */ +static int +__Pyx_RaiseUnexpectedTypeError(const char *expected, PyObject *obj) +{ + __Pyx_TypeName obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, "Expected %s, got " __Pyx_FMT_TYPENAME, + expected, obj_type_name); + __Pyx_DECREF_TypeName(obj_type_name); + return 0; +} + +/* UnpackUnboundCMethod */ +static PyObject *__Pyx_SelflessCall(PyObject *method, PyObject *args, PyObject *kwargs) { + PyObject *result; + PyObject *selfless_args = PyTuple_GetSlice(args, 1, PyTuple_Size(args)); + if (unlikely(!selfless_args)) return NULL; + result = PyObject_Call(method, selfless_args, kwargs); + Py_DECREF(selfless_args); + return result; +} +static PyMethodDef __Pyx_UnboundCMethod_Def = { + "CythonUnboundCMethod", + __PYX_REINTERPRET_FUNCION(PyCFunction, __Pyx_SelflessCall), + METH_VARARGS | METH_KEYWORDS, + NULL +}; +static int __Pyx_TryUnpackUnboundCMethod(__Pyx_CachedCFunction* target) { + PyObject *method; + method = __Pyx_PyObject_GetAttrStr(target->type, *target->method_name); + if (unlikely(!method)) + return -1; + target->method = method; +#if CYTHON_COMPILING_IN_CPYTHON + #if PY_MAJOR_VERSION >= 3 + if (likely(__Pyx_TypeCheck(method, &PyMethodDescr_Type))) + #else + if (likely(!__Pyx_CyOrPyCFunction_Check(method))) + #endif + { + PyMethodDescrObject *descr = (PyMethodDescrObject*) method; + target->func = descr->d_method->ml_meth; + target->flag = descr->d_method->ml_flags & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_STACKLESS); + } else +#endif +#if CYTHON_COMPILING_IN_PYPY +#else + if (PyCFunction_Check(method)) +#endif + { + PyObject *self; + int self_found; +#if CYTHON_COMPILING_IN_LIMITED_API || CYTHON_COMPILING_IN_PYPY + self = PyObject_GetAttrString(method, "__self__"); + if (!self) { + PyErr_Clear(); + } +#else + self = PyCFunction_GET_SELF(method); +#endif + self_found = (self && self != Py_None); +#if CYTHON_COMPILING_IN_LIMITED_API || CYTHON_COMPILING_IN_PYPY + Py_XDECREF(self); +#endif + if (self_found) { + PyObject *unbound_method = PyCFunction_New(&__Pyx_UnboundCMethod_Def, method); + if (unlikely(!unbound_method)) return -1; + Py_DECREF(method); + target->method = unbound_method; + } + } + return 0; +} + +/* CallUnboundCMethod0 */ +static PyObject* __Pyx__CallUnboundCMethod0(__Pyx_CachedCFunction* cfunc, PyObject* self) { + PyObject *args, *result = NULL; + if (unlikely(!cfunc->method) && unlikely(__Pyx_TryUnpackUnboundCMethod(cfunc) < 0)) return NULL; +#if CYTHON_ASSUME_SAFE_MACROS + args = PyTuple_New(1); + if (unlikely(!args)) goto bad; + Py_INCREF(self); + PyTuple_SET_ITEM(args, 0, self); +#else + args = PyTuple_Pack(1, self); + if (unlikely(!args)) goto bad; +#endif + result = __Pyx_PyObject_Call(cfunc->method, args, NULL); + Py_DECREF(args); +bad: + return result; +} + +/* set_iter */ +static CYTHON_INLINE PyObject* __Pyx_set_iterator(PyObject* iterable, int is_set, + Py_ssize_t* p_orig_length, int* p_source_is_set) { +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 + is_set = is_set || likely(PySet_CheckExact(iterable) || PyFrozenSet_CheckExact(iterable)); + *p_source_is_set = is_set; + if (likely(is_set)) { + *p_orig_length = PySet_Size(iterable); + Py_INCREF(iterable); + return iterable; + } +#else + CYTHON_UNUSED_VAR(is_set); + *p_source_is_set = 0; +#endif + *p_orig_length = 0; + return PyObject_GetIter(iterable); +} +static CYTHON_INLINE int __Pyx_set_iter_next( + PyObject* iter_obj, Py_ssize_t orig_length, + Py_ssize_t* ppos, PyObject **value, + int source_is_set) { + if (!CYTHON_COMPILING_IN_CPYTHON || PY_VERSION_HEX >= 0x030d0000 || unlikely(!source_is_set)) { + *value = PyIter_Next(iter_obj); + if (unlikely(!*value)) { + return __Pyx_IterFinish(); + } + CYTHON_UNUSED_VAR(orig_length); + CYTHON_UNUSED_VAR(ppos); + return 1; + } +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 + if (unlikely(PySet_GET_SIZE(iter_obj) != orig_length)) { + PyErr_SetString( + PyExc_RuntimeError, + "set changed size during iteration"); + return -1; + } + { + Py_hash_t hash; + int ret = _PySet_NextEntry(iter_obj, ppos, value, &hash); + assert (ret != -1); + if (likely(ret)) { + Py_INCREF(*value); + return 1; + } + } +#endif + return 0; +} + +/* CIntToDigits */ +static const char DIGIT_PAIRS_10[2*10*10+1] = { + "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899" +}; +static const char DIGIT_PAIRS_8[2*8*8+1] = { + "0001020304050607" + "1011121314151617" + "2021222324252627" + "3031323334353637" + "4041424344454647" + "5051525354555657" + "6061626364656667" + "7071727374757677" +}; +static const char DIGITS_HEX[2*16+1] = { + "0123456789abcdef" + "0123456789ABCDEF" +}; + +/* BuildPyUnicode */ +static PyObject* __Pyx_PyUnicode_BuildFromAscii(Py_ssize_t ulength, char* chars, int clength, + int prepend_sign, char padding_char) { + PyObject *uval; + Py_ssize_t uoffset = ulength - clength; +#if CYTHON_USE_UNICODE_INTERNALS + Py_ssize_t i; +#if CYTHON_PEP393_ENABLED + void *udata; + uval = PyUnicode_New(ulength, 127); + if (unlikely(!uval)) return NULL; + udata = PyUnicode_DATA(uval); +#else + Py_UNICODE *udata; + uval = PyUnicode_FromUnicode(NULL, ulength); + if (unlikely(!uval)) return NULL; + udata = PyUnicode_AS_UNICODE(uval); +#endif + if (uoffset > 0) { + i = 0; + if (prepend_sign) { + __Pyx_PyUnicode_WRITE(PyUnicode_1BYTE_KIND, udata, 0, '-'); + i++; + } + for (; i < uoffset; i++) { + __Pyx_PyUnicode_WRITE(PyUnicode_1BYTE_KIND, udata, i, padding_char); + } + } + for (i=0; i < clength; i++) { + __Pyx_PyUnicode_WRITE(PyUnicode_1BYTE_KIND, udata, uoffset+i, chars[i]); + } +#else + { + PyObject *sign = NULL, *padding = NULL; + uval = NULL; + if (uoffset > 0) { + prepend_sign = !!prepend_sign; + if (uoffset > prepend_sign) { + padding = PyUnicode_FromOrdinal(padding_char); + if (likely(padding) && uoffset > prepend_sign + 1) { + PyObject *tmp; + PyObject *repeat = PyInt_FromSsize_t(uoffset - prepend_sign); + if (unlikely(!repeat)) goto done_or_error; + tmp = PyNumber_Multiply(padding, repeat); + Py_DECREF(repeat); + Py_DECREF(padding); + padding = tmp; + } + if (unlikely(!padding)) goto done_or_error; + } + if (prepend_sign) { + sign = PyUnicode_FromOrdinal('-'); + if (unlikely(!sign)) goto done_or_error; + } + } + uval = PyUnicode_DecodeASCII(chars, clength, NULL); + if (likely(uval) && padding) { + PyObject *tmp = PyNumber_Add(padding, uval); + Py_DECREF(uval); + uval = tmp; + } + if (likely(uval) && sign) { + PyObject *tmp = PyNumber_Add(sign, uval); + Py_DECREF(uval); + uval = tmp; + } +done_or_error: + Py_XDECREF(padding); + Py_XDECREF(sign); + } +#endif + return uval; +} + +/* CIntToPyUnicode */ +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_From_int(int value, Py_ssize_t width, char padding_char, char format_char) { + char digits[sizeof(int)*3+2]; + char *dpos, *end = digits + sizeof(int)*3+2; + const char *hex_digits = DIGITS_HEX; + Py_ssize_t length, ulength; + int prepend_sign, last_one_off; + int remaining; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const int neg_one = (int) -1, const_zero = (int) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; + if (format_char == 'X') { + hex_digits += 16; + format_char = 'x'; + } + remaining = value; + last_one_off = 0; + dpos = end; + do { + int digit_pos; + switch (format_char) { + case 'o': + digit_pos = abs((int)(remaining % (8*8))); + remaining = (int) (remaining / (8*8)); + dpos -= 2; + memcpy(dpos, DIGIT_PAIRS_8 + digit_pos * 2, 2); + last_one_off = (digit_pos < 8); + break; + case 'd': + digit_pos = abs((int)(remaining % (10*10))); + remaining = (int) (remaining / (10*10)); + dpos -= 2; + memcpy(dpos, DIGIT_PAIRS_10 + digit_pos * 2, 2); + last_one_off = (digit_pos < 10); + break; + case 'x': + *(--dpos) = hex_digits[abs((int)(remaining % 16))]; + remaining = (int) (remaining / 16); + break; + default: + assert(0); + break; + } + } while (unlikely(remaining != 0)); + assert(!last_one_off || *dpos == '0'); + dpos += last_one_off; + length = end - dpos; + ulength = length; + prepend_sign = 0; + if (!is_unsigned && value <= neg_one) { + if (padding_char == ' ' || width <= length + 1) { + *(--dpos) = '-'; + ++length; + } else { + prepend_sign = 1; + } + ++ulength; + } + if (width > ulength) { + ulength = width; + } + if (ulength == 1) { + return PyUnicode_FromOrdinal(*dpos); + } + return __Pyx_PyUnicode_BuildFromAscii(ulength, dpos, (int) length, prepend_sign, padding_char); +} + +/* JoinPyUnicode */ +static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength, + Py_UCS4 max_char) { +#if CYTHON_USE_UNICODE_INTERNALS && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + PyObject *result_uval; + int result_ukind, kind_shift; + Py_ssize_t i, char_pos; + void *result_udata; + CYTHON_MAYBE_UNUSED_VAR(max_char); +#if CYTHON_PEP393_ENABLED + result_uval = PyUnicode_New(result_ulength, max_char); + if (unlikely(!result_uval)) return NULL; + result_ukind = (max_char <= 255) ? PyUnicode_1BYTE_KIND : (max_char <= 65535) ? PyUnicode_2BYTE_KIND : PyUnicode_4BYTE_KIND; + kind_shift = (result_ukind == PyUnicode_4BYTE_KIND) ? 2 : result_ukind - 1; + result_udata = PyUnicode_DATA(result_uval); +#else + result_uval = PyUnicode_FromUnicode(NULL, result_ulength); + if (unlikely(!result_uval)) return NULL; + result_ukind = sizeof(Py_UNICODE); + kind_shift = (result_ukind == 4) ? 2 : result_ukind - 1; + result_udata = PyUnicode_AS_UNICODE(result_uval); +#endif + assert(kind_shift == 2 || kind_shift == 1 || kind_shift == 0); + char_pos = 0; + for (i=0; i < value_count; i++) { + int ukind; + Py_ssize_t ulength; + void *udata; + PyObject *uval = PyTuple_GET_ITEM(value_tuple, i); + if (unlikely(__Pyx_PyUnicode_READY(uval))) + goto bad; + ulength = __Pyx_PyUnicode_GET_LENGTH(uval); + if (unlikely(!ulength)) + continue; + if (unlikely((PY_SSIZE_T_MAX >> kind_shift) - ulength < char_pos)) + goto overflow; + ukind = __Pyx_PyUnicode_KIND(uval); + udata = __Pyx_PyUnicode_DATA(uval); + if (!CYTHON_PEP393_ENABLED || ukind == result_ukind) { + memcpy((char *)result_udata + (char_pos << kind_shift), udata, (size_t) (ulength << kind_shift)); + } else { + #if PY_VERSION_HEX >= 0x030d0000 + if (unlikely(PyUnicode_CopyCharacters(result_uval, char_pos, uval, 0, ulength) < 0)) goto bad; + #elif CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030300F0 || defined(_PyUnicode_FastCopyCharacters) + _PyUnicode_FastCopyCharacters(result_uval, char_pos, uval, 0, ulength); + #else + Py_ssize_t j; + for (j=0; j < ulength; j++) { + Py_UCS4 uchar = __Pyx_PyUnicode_READ(ukind, udata, j); + __Pyx_PyUnicode_WRITE(result_ukind, result_udata, char_pos+j, uchar); + } + #endif + } + char_pos += ulength; + } + return result_uval; +overflow: + PyErr_SetString(PyExc_OverflowError, "join() result is too long for a Python string"); +bad: + Py_DECREF(result_uval); + return NULL; +#else + CYTHON_UNUSED_VAR(max_char); + CYTHON_UNUSED_VAR(result_ulength); + CYTHON_UNUSED_VAR(value_count); + return PyUnicode_Join(__pyx_empty_unicode, value_tuple); +#endif +} + +/* UnicodeConcatInPlace */ +# if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 +static int +__Pyx_unicode_modifiable(PyObject *unicode) +{ + if (Py_REFCNT(unicode) != 1) + return 0; + if (!PyUnicode_CheckExact(unicode)) + return 0; + if (PyUnicode_CHECK_INTERNED(unicode)) + return 0; + return 1; +} +static CYTHON_INLINE PyObject *__Pyx_PyUnicode_ConcatInPlaceImpl(PyObject **p_left, PyObject *right + #if CYTHON_REFNANNY + , void* __pyx_refnanny + #endif + ) { + PyObject *left = *p_left; + Py_ssize_t left_len, right_len, new_len; + if (unlikely(__Pyx_PyUnicode_READY(left) == -1)) + return NULL; + if (unlikely(__Pyx_PyUnicode_READY(right) == -1)) + return NULL; + left_len = PyUnicode_GET_LENGTH(left); + if (left_len == 0) { + Py_INCREF(right); + return right; + } + right_len = PyUnicode_GET_LENGTH(right); + if (right_len == 0) { + Py_INCREF(left); + return left; + } + if (unlikely(left_len > PY_SSIZE_T_MAX - right_len)) { + PyErr_SetString(PyExc_OverflowError, + "strings are too large to concat"); + return NULL; + } + new_len = left_len + right_len; + if (__Pyx_unicode_modifiable(left) + && PyUnicode_CheckExact(right) + && PyUnicode_KIND(right) <= PyUnicode_KIND(left) + && !(PyUnicode_IS_ASCII(left) && !PyUnicode_IS_ASCII(right))) { + int ret; + __Pyx_GIVEREF(*p_left); + ret = PyUnicode_Resize(p_left, new_len); + __Pyx_GOTREF(*p_left); + if (unlikely(ret != 0)) + return NULL; + #if PY_VERSION_HEX >= 0x030d0000 + if (unlikely(PyUnicode_CopyCharacters(*p_left, left_len, right, 0, right_len) < 0)) return NULL; + #else + _PyUnicode_FastCopyCharacters(*p_left, left_len, right, 0, right_len); + #endif + __Pyx_INCREF(*p_left); + __Pyx_GIVEREF(*p_left); + return *p_left; + } else { + return __Pyx_PyUnicode_Concat(left, right); + } + } +#endif + +/* CallUnboundCMethod1 */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg) { + if (likely(cfunc->func)) { + int flag = cfunc->flag; + if (flag == METH_O) { + return (*(cfunc->func))(self, arg); + } else if ((PY_VERSION_HEX >= 0x030600B1) && flag == METH_FASTCALL) { + #if PY_VERSION_HEX >= 0x030700A0 + return (*(__Pyx_PyCFunctionFast)(void*)(PyCFunction)cfunc->func)(self, &arg, 1); + #else + return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, &arg, 1, NULL); + #endif + } else if ((PY_VERSION_HEX >= 0x030700A0) && flag == (METH_FASTCALL | METH_KEYWORDS)) { + return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, &arg, 1, NULL); + } + } + return __Pyx__CallUnboundCMethod1(cfunc, self, arg); +} +#endif +static PyObject* __Pyx__CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg){ + PyObject *args, *result = NULL; + if (unlikely(!cfunc->func && !cfunc->method) && unlikely(__Pyx_TryUnpackUnboundCMethod(cfunc) < 0)) return NULL; +#if CYTHON_COMPILING_IN_CPYTHON + if (cfunc->func && (cfunc->flag & METH_VARARGS)) { + args = PyTuple_New(1); + if (unlikely(!args)) goto bad; + Py_INCREF(arg); + PyTuple_SET_ITEM(args, 0, arg); + if (cfunc->flag & METH_KEYWORDS) + result = (*(PyCFunctionWithKeywords)(void*)(PyCFunction)cfunc->func)(self, args, NULL); + else + result = (*cfunc->func)(self, args); + } else { + args = PyTuple_New(2); + if (unlikely(!args)) goto bad; + Py_INCREF(self); + PyTuple_SET_ITEM(args, 0, self); + Py_INCREF(arg); + PyTuple_SET_ITEM(args, 1, arg); + result = __Pyx_PyObject_Call(cfunc->method, args, NULL); + } +#else + args = PyTuple_Pack(2, self, arg); + if (unlikely(!args)) goto bad; + result = __Pyx_PyObject_Call(cfunc->method, args, NULL); +#endif +bad: + Py_XDECREF(args); + return result; +} + +/* CallUnboundCMethod2 */ +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030600B1 +static CYTHON_INLINE PyObject *__Pyx_CallUnboundCMethod2(__Pyx_CachedCFunction *cfunc, PyObject *self, PyObject *arg1, PyObject *arg2) { + if (likely(cfunc->func)) { + PyObject *args[2] = {arg1, arg2}; + if (cfunc->flag == METH_FASTCALL) { + #if PY_VERSION_HEX >= 0x030700A0 + return (*(__Pyx_PyCFunctionFast)(void*)(PyCFunction)cfunc->func)(self, args, 2); + #else + return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, args, 2, NULL); + #endif + } + #if PY_VERSION_HEX >= 0x030700A0 + if (cfunc->flag == (METH_FASTCALL | METH_KEYWORDS)) + return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, args, 2, NULL); + #endif + } + return __Pyx__CallUnboundCMethod2(cfunc, self, arg1, arg2); +} +#endif +static PyObject* __Pyx__CallUnboundCMethod2(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg1, PyObject* arg2){ + PyObject *args, *result = NULL; + if (unlikely(!cfunc->func && !cfunc->method) && unlikely(__Pyx_TryUnpackUnboundCMethod(cfunc) < 0)) return NULL; +#if CYTHON_COMPILING_IN_CPYTHON + if (cfunc->func && (cfunc->flag & METH_VARARGS)) { + args = PyTuple_New(2); + if (unlikely(!args)) goto bad; + Py_INCREF(arg1); + PyTuple_SET_ITEM(args, 0, arg1); + Py_INCREF(arg2); + PyTuple_SET_ITEM(args, 1, arg2); + if (cfunc->flag & METH_KEYWORDS) + result = (*(PyCFunctionWithKeywords)(void*)(PyCFunction)cfunc->func)(self, args, NULL); + else + result = (*cfunc->func)(self, args); + } else { + args = PyTuple_New(3); + if (unlikely(!args)) goto bad; + Py_INCREF(self); + PyTuple_SET_ITEM(args, 0, self); + Py_INCREF(arg1); + PyTuple_SET_ITEM(args, 1, arg1); + Py_INCREF(arg2); + PyTuple_SET_ITEM(args, 2, arg2); + result = __Pyx_PyObject_Call(cfunc->method, args, NULL); + } +#else + args = PyTuple_Pack(3, self, arg1, arg2); + if (unlikely(!args)) goto bad; + result = __Pyx_PyObject_Call(cfunc->method, args, NULL); +#endif +bad: + Py_XDECREF(args); + return result; +} + +/* dict_getitem_default */ +static PyObject* __Pyx_PyDict_GetItemDefault(PyObject* d, PyObject* key, PyObject* default_value) { + PyObject* value; +#if PY_MAJOR_VERSION >= 3 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07020000) + value = PyDict_GetItemWithError(d, key); + if (unlikely(!value)) { + if (unlikely(PyErr_Occurred())) + return NULL; + value = default_value; + } + Py_INCREF(value); + if ((1)); +#else + if (PyString_CheckExact(key) || PyUnicode_CheckExact(key) || PyInt_CheckExact(key)) { + value = PyDict_GetItem(d, key); + if (unlikely(!value)) { + value = default_value; + } + Py_INCREF(value); + } +#endif + else { + if (default_value == Py_None) + value = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PyDict_Type_get, d, key); + else + value = __Pyx_CallUnboundCMethod2(&__pyx_umethod_PyDict_Type_get, d, key, default_value); + } + return value; +} + +/* GetItemInt */ +static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) { + PyObject *r; + if (unlikely(!j)) return NULL; + r = PyObject_GetItem(o, j); + Py_DECREF(j); + return r; +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + Py_ssize_t wrapped_i = i; + if (wraparound & unlikely(i < 0)) { + wrapped_i += PyList_GET_SIZE(o); + } + if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyList_GET_SIZE(o)))) { + PyObject *r = PyList_GET_ITEM(o, wrapped_i); + Py_INCREF(r); + return r; + } + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +#else + return PySequence_GetItem(o, i); +#endif +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + Py_ssize_t wrapped_i = i; + if (wraparound & unlikely(i < 0)) { + wrapped_i += PyTuple_GET_SIZE(o); + } + if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyTuple_GET_SIZE(o)))) { + PyObject *r = PyTuple_GET_ITEM(o, wrapped_i); + Py_INCREF(r); + return r; + } + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +#else + return PySequence_GetItem(o, i); +#endif +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS + if (is_list || PyList_CheckExact(o)) { + Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o); + if ((!boundscheck) || (likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o))))) { + PyObject *r = PyList_GET_ITEM(o, n); + Py_INCREF(r); + return r; + } + } + else if (PyTuple_CheckExact(o)) { + Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o); + if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyTuple_GET_SIZE(o)))) { + PyObject *r = PyTuple_GET_ITEM(o, n); + Py_INCREF(r); + return r; + } + } else { + PyMappingMethods *mm = Py_TYPE(o)->tp_as_mapping; + PySequenceMethods *sm = Py_TYPE(o)->tp_as_sequence; + if (mm && mm->mp_subscript) { + PyObject *r, *key = PyInt_FromSsize_t(i); + if (unlikely(!key)) return NULL; + r = mm->mp_subscript(o, key); + Py_DECREF(key); + return r; + } + if (likely(sm && sm->sq_item)) { + if (wraparound && unlikely(i < 0) && likely(sm->sq_length)) { + Py_ssize_t l = sm->sq_length(o); + if (likely(l >= 0)) { + i += l; + } else { + if (!PyErr_ExceptionMatches(PyExc_OverflowError)) + return NULL; + PyErr_Clear(); + } + } + return sm->sq_item(o, i); + } + } +#else + if (is_list || !PyMapping_Check(o)) { + return PySequence_GetItem(o, i); + } +#endif + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +} + +/* PyUnicode_Unicode */ +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Unicode(PyObject *obj) { + if (unlikely(obj == Py_None)) + obj = __pyx_kp_u_None; + return __Pyx_NewRef(obj); +} + +/* ArgTypeTest */ +static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact) +{ + __Pyx_TypeName type_name; + __Pyx_TypeName obj_type_name; + if (unlikely(!type)) { + PyErr_SetString(PyExc_SystemError, "Missing type object"); + return 0; + } + else if (exact) { + #if PY_MAJOR_VERSION == 2 + if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; + #endif + } + else { + if (likely(__Pyx_TypeCheck(obj, type))) return 1; + } + type_name = __Pyx_PyType_GetName(type); + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, + "Argument '%.200s' has incorrect type (expected " __Pyx_FMT_TYPENAME + ", got " __Pyx_FMT_TYPENAME ")", name, type_name, obj_type_name); + __Pyx_DECREF_TypeName(type_name); + __Pyx_DECREF_TypeName(obj_type_name); + return 0; +} + +/* KeywordStringCheck */ +static int __Pyx_CheckKeywordStrings( + PyObject *kw, + const char* function_name, + int kw_allowed) +{ + PyObject* key = 0; + Py_ssize_t pos = 0; +#if CYTHON_COMPILING_IN_PYPY + if (!kw_allowed && PyDict_Next(kw, &pos, &key, 0)) + goto invalid_keyword; + return 1; +#else + if (CYTHON_METH_FASTCALL && likely(PyTuple_Check(kw))) { + Py_ssize_t kwsize; +#if CYTHON_ASSUME_SAFE_MACROS + kwsize = PyTuple_GET_SIZE(kw); +#else + kwsize = PyTuple_Size(kw); + if (kwsize < 0) return 0; +#endif + if (unlikely(kwsize == 0)) + return 1; + if (!kw_allowed) { +#if CYTHON_ASSUME_SAFE_MACROS + key = PyTuple_GET_ITEM(kw, 0); +#else + key = PyTuple_GetItem(kw, pos); + if (!key) return 0; +#endif + goto invalid_keyword; + } +#if PY_VERSION_HEX < 0x03090000 + for (pos = 0; pos < kwsize; pos++) { +#if CYTHON_ASSUME_SAFE_MACROS + key = PyTuple_GET_ITEM(kw, pos); +#else + key = PyTuple_GetItem(kw, pos); + if (!key) return 0; +#endif + if (unlikely(!PyUnicode_Check(key))) + goto invalid_keyword_type; + } +#endif + return 1; + } + while (PyDict_Next(kw, &pos, &key, 0)) { + #if PY_MAJOR_VERSION < 3 + if (unlikely(!PyString_Check(key))) + #endif + if (unlikely(!PyUnicode_Check(key))) + goto invalid_keyword_type; + } + if (!kw_allowed && unlikely(key)) + goto invalid_keyword; + return 1; +invalid_keyword_type: + PyErr_Format(PyExc_TypeError, + "%.200s() keywords must be strings", function_name); + return 0; +#endif +invalid_keyword: + #if PY_MAJOR_VERSION < 3 + PyErr_Format(PyExc_TypeError, + "%.200s() got an unexpected keyword argument '%.200s'", + function_name, PyString_AsString(key)); + #else + PyErr_Format(PyExc_TypeError, + "%s() got an unexpected keyword argument '%U'", + function_name, key); + #endif + return 0; +} + +/* RaiseException */ +#if PY_MAJOR_VERSION < 3 +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) { + __Pyx_PyThreadState_declare + CYTHON_UNUSED_VAR(cause); + Py_XINCREF(type); + if (!value || value == Py_None) + value = NULL; + else + Py_INCREF(value); + if (!tb || tb == Py_None) + tb = NULL; + else { + Py_INCREF(tb); + if (!PyTraceBack_Check(tb)) { + PyErr_SetString(PyExc_TypeError, + "raise: arg 3 must be a traceback or None"); + goto raise_error; + } + } + if (PyType_Check(type)) { +#if CYTHON_COMPILING_IN_PYPY + if (!value) { + Py_INCREF(Py_None); + value = Py_None; + } +#endif + PyErr_NormalizeException(&type, &value, &tb); + } else { + if (value) { + PyErr_SetString(PyExc_TypeError, + "instance exception may not have a separate value"); + goto raise_error; + } + value = type; + type = (PyObject*) Py_TYPE(type); + Py_INCREF(type); + if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) { + PyErr_SetString(PyExc_TypeError, + "raise: exception class must be a subclass of BaseException"); + goto raise_error; + } + } + __Pyx_PyThreadState_assign + __Pyx_ErrRestore(type, value, tb); + return; +raise_error: + Py_XDECREF(value); + Py_XDECREF(type); + Py_XDECREF(tb); + return; +} +#else +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) { + PyObject* owned_instance = NULL; + if (tb == Py_None) { + tb = 0; + } else if (tb && !PyTraceBack_Check(tb)) { + PyErr_SetString(PyExc_TypeError, + "raise: arg 3 must be a traceback or None"); + goto bad; + } + if (value == Py_None) + value = 0; + if (PyExceptionInstance_Check(type)) { + if (value) { + PyErr_SetString(PyExc_TypeError, + "instance exception may not have a separate value"); + goto bad; + } + value = type; + type = (PyObject*) Py_TYPE(value); + } else if (PyExceptionClass_Check(type)) { + PyObject *instance_class = NULL; + if (value && PyExceptionInstance_Check(value)) { + instance_class = (PyObject*) Py_TYPE(value); + if (instance_class != type) { + int is_subclass = PyObject_IsSubclass(instance_class, type); + if (!is_subclass) { + instance_class = NULL; + } else if (unlikely(is_subclass == -1)) { + goto bad; + } else { + type = instance_class; + } + } + } + if (!instance_class) { + PyObject *args; + if (!value) + args = PyTuple_New(0); + else if (PyTuple_Check(value)) { + Py_INCREF(value); + args = value; + } else + args = PyTuple_Pack(1, value); + if (!args) + goto bad; + owned_instance = PyObject_Call(type, args, NULL); + Py_DECREF(args); + if (!owned_instance) + goto bad; + value = owned_instance; + if (!PyExceptionInstance_Check(value)) { + PyErr_Format(PyExc_TypeError, + "calling %R should have returned an instance of " + "BaseException, not %R", + type, Py_TYPE(value)); + goto bad; + } + } + } else { + PyErr_SetString(PyExc_TypeError, + "raise: exception class must be a subclass of BaseException"); + goto bad; + } + if (cause) { + PyObject *fixed_cause; + if (cause == Py_None) { + fixed_cause = NULL; + } else if (PyExceptionClass_Check(cause)) { + fixed_cause = PyObject_CallObject(cause, NULL); + if (fixed_cause == NULL) + goto bad; + } else if (PyExceptionInstance_Check(cause)) { + fixed_cause = cause; + Py_INCREF(fixed_cause); + } else { + PyErr_SetString(PyExc_TypeError, + "exception causes must derive from " + "BaseException"); + goto bad; + } + PyException_SetCause(value, fixed_cause); + } + PyErr_SetObject(type, value); + if (tb) { + #if PY_VERSION_HEX >= 0x030C00A6 + PyException_SetTraceback(value, tb); + #elif CYTHON_FAST_THREAD_STATE + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject* tmp_tb = tstate->curexc_traceback; + if (tb != tmp_tb) { + Py_INCREF(tb); + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_tb); + } +#else + PyObject *tmp_type, *tmp_value, *tmp_tb; + PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb); + Py_INCREF(tb); + PyErr_Restore(tmp_type, tmp_value, tb); + Py_XDECREF(tmp_tb); +#endif + } +bad: + Py_XDECREF(owned_instance); + return; +} +#endif + +/* ValidateBasesTuple */ +#if CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API || CYTHON_USE_TYPE_SPECS +static int __Pyx_validate_bases_tuple(const char *type_name, Py_ssize_t dictoffset, PyObject *bases) { + Py_ssize_t i, n; +#if CYTHON_ASSUME_SAFE_MACROS + n = PyTuple_GET_SIZE(bases); +#else + n = PyTuple_Size(bases); + if (n < 0) return -1; +#endif + for (i = 1; i < n; i++) + { +#if CYTHON_AVOID_BORROWED_REFS + PyObject *b0 = PySequence_GetItem(bases, i); + if (!b0) return -1; +#elif CYTHON_ASSUME_SAFE_MACROS + PyObject *b0 = PyTuple_GET_ITEM(bases, i); +#else + PyObject *b0 = PyTuple_GetItem(bases, i); + if (!b0) return -1; +#endif + PyTypeObject *b; +#if PY_MAJOR_VERSION < 3 + if (PyClass_Check(b0)) + { + PyErr_Format(PyExc_TypeError, "base class '%.200s' is an old-style class", + PyString_AS_STRING(((PyClassObject*)b0)->cl_name)); +#if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(b0); +#endif + return -1; + } +#endif + b = (PyTypeObject*) b0; + if (!__Pyx_PyType_HasFeature(b, Py_TPFLAGS_HEAPTYPE)) + { + __Pyx_TypeName b_name = __Pyx_PyType_GetName(b); + PyErr_Format(PyExc_TypeError, + "base class '" __Pyx_FMT_TYPENAME "' is not a heap type", b_name); + __Pyx_DECREF_TypeName(b_name); +#if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(b0); +#endif + return -1; + } + if (dictoffset == 0) + { + Py_ssize_t b_dictoffset = 0; +#if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY + b_dictoffset = b->tp_dictoffset; +#else + PyObject *py_b_dictoffset = PyObject_GetAttrString((PyObject*)b, "__dictoffset__"); + if (!py_b_dictoffset) goto dictoffset_return; + b_dictoffset = PyLong_AsSsize_t(py_b_dictoffset); + Py_DECREF(py_b_dictoffset); + if (b_dictoffset == -1 && PyErr_Occurred()) goto dictoffset_return; +#endif + if (b_dictoffset) { + { + __Pyx_TypeName b_name = __Pyx_PyType_GetName(b); + PyErr_Format(PyExc_TypeError, + "extension type '%.200s' has no __dict__ slot, " + "but base type '" __Pyx_FMT_TYPENAME "' has: " + "either add 'cdef dict __dict__' to the extension type " + "or add '__slots__ = [...]' to the base type", + type_name, b_name); + __Pyx_DECREF_TypeName(b_name); + } +#if !(CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY) + dictoffset_return: +#endif +#if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(b0); +#endif + return -1; + } + } +#if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(b0); +#endif + } + return 0; +} +#endif + +/* PyType_Ready */ +static int __Pyx_PyType_Ready(PyTypeObject *t) { +#if CYTHON_USE_TYPE_SPECS || !(CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API) || defined(PYSTON_MAJOR_VERSION) + (void)__Pyx_PyObject_CallMethod0; +#if CYTHON_USE_TYPE_SPECS + (void)__Pyx_validate_bases_tuple; +#endif + return PyType_Ready(t); +#else + int r; + PyObject *bases = __Pyx_PyType_GetSlot(t, tp_bases, PyObject*); + if (bases && unlikely(__Pyx_validate_bases_tuple(t->tp_name, t->tp_dictoffset, bases) == -1)) + return -1; +#if PY_VERSION_HEX >= 0x03050000 && !defined(PYSTON_MAJOR_VERSION) + { + int gc_was_enabled; + #if PY_VERSION_HEX >= 0x030A00b1 + gc_was_enabled = PyGC_Disable(); + (void)__Pyx_PyObject_CallMethod0; + #else + PyObject *ret, *py_status; + PyObject *gc = NULL; + #if PY_VERSION_HEX >= 0x030700a1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM+0 >= 0x07030400) + gc = PyImport_GetModule(__pyx_kp_u_gc); + #endif + if (unlikely(!gc)) gc = PyImport_Import(__pyx_kp_u_gc); + if (unlikely(!gc)) return -1; + py_status = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_isenabled); + if (unlikely(!py_status)) { + Py_DECREF(gc); + return -1; + } + gc_was_enabled = __Pyx_PyObject_IsTrue(py_status); + Py_DECREF(py_status); + if (gc_was_enabled > 0) { + ret = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_disable); + if (unlikely(!ret)) { + Py_DECREF(gc); + return -1; + } + Py_DECREF(ret); + } else if (unlikely(gc_was_enabled == -1)) { + Py_DECREF(gc); + return -1; + } + #endif + t->tp_flags |= Py_TPFLAGS_HEAPTYPE; +#if PY_VERSION_HEX >= 0x030A0000 + t->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; +#endif +#else + (void)__Pyx_PyObject_CallMethod0; +#endif + r = PyType_Ready(t); +#if PY_VERSION_HEX >= 0x03050000 && !defined(PYSTON_MAJOR_VERSION) + t->tp_flags &= ~Py_TPFLAGS_HEAPTYPE; + #if PY_VERSION_HEX >= 0x030A00b1 + if (gc_was_enabled) + PyGC_Enable(); + #else + if (gc_was_enabled) { + PyObject *tp, *v, *tb; + PyErr_Fetch(&tp, &v, &tb); + ret = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_enable); + if (likely(ret || r == -1)) { + Py_XDECREF(ret); + PyErr_Restore(tp, v, tb); + } else { + Py_XDECREF(tp); + Py_XDECREF(v); + Py_XDECREF(tb); + r = -1; + } + } + Py_DECREF(gc); + #endif + } +#endif + return r; +#endif +} + +/* PyObject_GenericGetAttrNoDict */ +#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 +static PyObject *__Pyx_RaiseGenericGetAttributeError(PyTypeObject *tp, PyObject *attr_name) { + __Pyx_TypeName type_name = __Pyx_PyType_GetName(tp); + PyErr_Format(PyExc_AttributeError, +#if PY_MAJOR_VERSION >= 3 + "'" __Pyx_FMT_TYPENAME "' object has no attribute '%U'", + type_name, attr_name); +#else + "'" __Pyx_FMT_TYPENAME "' object has no attribute '%.400s'", + type_name, PyString_AS_STRING(attr_name)); +#endif + __Pyx_DECREF_TypeName(type_name); + return NULL; +} +static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name) { + PyObject *descr; + PyTypeObject *tp = Py_TYPE(obj); + if (unlikely(!PyString_Check(attr_name))) { + return PyObject_GenericGetAttr(obj, attr_name); + } + assert(!tp->tp_dictoffset); + descr = _PyType_Lookup(tp, attr_name); + if (unlikely(!descr)) { + return __Pyx_RaiseGenericGetAttributeError(tp, attr_name); + } + Py_INCREF(descr); + #if PY_MAJOR_VERSION < 3 + if (likely(PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_HAVE_CLASS))) + #endif + { + descrgetfunc f = Py_TYPE(descr)->tp_descr_get; + if (unlikely(f)) { + PyObject *res = f(descr, obj, (PyObject *)tp); + Py_DECREF(descr); + return res; + } + } + return descr; +} +#endif + +/* PyObject_GenericGetAttr */ +#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 +static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name) { + if (unlikely(Py_TYPE(obj)->tp_dictoffset)) { + return PyObject_GenericGetAttr(obj, attr_name); + } + return __Pyx_PyObject_GenericGetAttrNoDict(obj, attr_name); +} +#endif + +/* SetVTable */ +static int __Pyx_SetVtable(PyTypeObject *type, void *vtable) { + PyObject *ob = PyCapsule_New(vtable, 0, 0); + if (unlikely(!ob)) + goto bad; +#if CYTHON_COMPILING_IN_LIMITED_API + if (unlikely(PyObject_SetAttr((PyObject *) type, __pyx_n_s_pyx_vtable, ob) < 0)) +#else + if (unlikely(PyDict_SetItem(type->tp_dict, __pyx_n_s_pyx_vtable, ob) < 0)) +#endif + goto bad; + Py_DECREF(ob); + return 0; +bad: + Py_XDECREF(ob); + return -1; +} + +/* GetVTable */ +static void* __Pyx_GetVtable(PyTypeObject *type) { + void* ptr; +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject *ob = PyObject_GetAttr((PyObject *)type, __pyx_n_s_pyx_vtable); +#else + PyObject *ob = PyObject_GetItem(type->tp_dict, __pyx_n_s_pyx_vtable); +#endif + if (!ob) + goto bad; + ptr = PyCapsule_GetPointer(ob, 0); + if (!ptr && !PyErr_Occurred()) + PyErr_SetString(PyExc_RuntimeError, "invalid vtable found for imported type"); + Py_DECREF(ob); + return ptr; +bad: + Py_XDECREF(ob); + return NULL; +} + +/* MergeVTables */ +#if !CYTHON_COMPILING_IN_LIMITED_API +static int __Pyx_MergeVtables(PyTypeObject *type) { + int i; + void** base_vtables; + __Pyx_TypeName tp_base_name; + __Pyx_TypeName base_name; + void* unknown = (void*)-1; + PyObject* bases = type->tp_bases; + int base_depth = 0; + { + PyTypeObject* base = type->tp_base; + while (base) { + base_depth += 1; + base = base->tp_base; + } + } + base_vtables = (void**) malloc(sizeof(void*) * (size_t)(base_depth + 1)); + base_vtables[0] = unknown; + for (i = 1; i < PyTuple_GET_SIZE(bases); i++) { + void* base_vtable = __Pyx_GetVtable(((PyTypeObject*)PyTuple_GET_ITEM(bases, i))); + if (base_vtable != NULL) { + int j; + PyTypeObject* base = type->tp_base; + for (j = 0; j < base_depth; j++) { + if (base_vtables[j] == unknown) { + base_vtables[j] = __Pyx_GetVtable(base); + base_vtables[j + 1] = unknown; + } + if (base_vtables[j] == base_vtable) { + break; + } else if (base_vtables[j] == NULL) { + goto bad; + } + base = base->tp_base; + } + } + } + PyErr_Clear(); + free(base_vtables); + return 0; +bad: + tp_base_name = __Pyx_PyType_GetName(type->tp_base); + base_name = __Pyx_PyType_GetName((PyTypeObject*)PyTuple_GET_ITEM(bases, i)); + PyErr_Format(PyExc_TypeError, + "multiple bases have vtable conflict: '" __Pyx_FMT_TYPENAME "' and '" __Pyx_FMT_TYPENAME "'", tp_base_name, base_name); + __Pyx_DECREF_TypeName(tp_base_name); + __Pyx_DECREF_TypeName(base_name); + free(base_vtables); + return -1; +} +#endif + +/* SetupReduce */ +#if !CYTHON_COMPILING_IN_LIMITED_API +static int __Pyx_setup_reduce_is_named(PyObject* meth, PyObject* name) { + int ret; + PyObject *name_attr; + name_attr = __Pyx_PyObject_GetAttrStrNoError(meth, __pyx_n_s_name); + if (likely(name_attr)) { + ret = PyObject_RichCompareBool(name_attr, name, Py_EQ); + } else { + ret = -1; + } + if (unlikely(ret < 0)) { + PyErr_Clear(); + ret = 0; + } + Py_XDECREF(name_attr); + return ret; +} +static int __Pyx_setup_reduce(PyObject* type_obj) { + int ret = 0; + PyObject *object_reduce = NULL; + PyObject *object_getstate = NULL; + PyObject *object_reduce_ex = NULL; + PyObject *reduce = NULL; + PyObject *reduce_ex = NULL; + PyObject *reduce_cython = NULL; + PyObject *setstate = NULL; + PyObject *setstate_cython = NULL; + PyObject *getstate = NULL; +#if CYTHON_USE_PYTYPE_LOOKUP + getstate = _PyType_Lookup((PyTypeObject*)type_obj, __pyx_n_s_getstate); +#else + getstate = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_getstate); + if (!getstate && PyErr_Occurred()) { + goto __PYX_BAD; + } +#endif + if (getstate) { +#if CYTHON_USE_PYTYPE_LOOKUP + object_getstate = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_getstate); +#else + object_getstate = __Pyx_PyObject_GetAttrStrNoError((PyObject*)&PyBaseObject_Type, __pyx_n_s_getstate); + if (!object_getstate && PyErr_Occurred()) { + goto __PYX_BAD; + } +#endif + if (object_getstate != getstate) { + goto __PYX_GOOD; + } + } +#if CYTHON_USE_PYTYPE_LOOKUP + object_reduce_ex = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD; +#else + object_reduce_ex = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD; +#endif + reduce_ex = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce_ex); if (unlikely(!reduce_ex)) goto __PYX_BAD; + if (reduce_ex == object_reduce_ex) { +#if CYTHON_USE_PYTYPE_LOOKUP + object_reduce = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD; +#else + object_reduce = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD; +#endif + reduce = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce); if (unlikely(!reduce)) goto __PYX_BAD; + if (reduce == object_reduce || __Pyx_setup_reduce_is_named(reduce, __pyx_n_s_reduce_cython)) { + reduce_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_reduce_cython); + if (likely(reduce_cython)) { + ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce, reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD; + ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD; + } else if (reduce == object_reduce || PyErr_Occurred()) { + goto __PYX_BAD; + } + setstate = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_setstate); + if (!setstate) PyErr_Clear(); + if (!setstate || __Pyx_setup_reduce_is_named(setstate, __pyx_n_s_setstate_cython)) { + setstate_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_setstate_cython); + if (likely(setstate_cython)) { + ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate, setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD; + ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD; + } else if (!setstate || PyErr_Occurred()) { + goto __PYX_BAD; + } + } + PyType_Modified((PyTypeObject*)type_obj); + } + } + goto __PYX_GOOD; +__PYX_BAD: + if (!PyErr_Occurred()) { + __Pyx_TypeName type_obj_name = + __Pyx_PyType_GetName((PyTypeObject*)type_obj); + PyErr_Format(PyExc_RuntimeError, + "Unable to initialize pickling for " __Pyx_FMT_TYPENAME, type_obj_name); + __Pyx_DECREF_TypeName(type_obj_name); + } + ret = -1; +__PYX_GOOD: +#if !CYTHON_USE_PYTYPE_LOOKUP + Py_XDECREF(object_reduce); + Py_XDECREF(object_reduce_ex); + Py_XDECREF(object_getstate); + Py_XDECREF(getstate); +#endif + Py_XDECREF(reduce); + Py_XDECREF(reduce_ex); + Py_XDECREF(reduce_cython); + Py_XDECREF(setstate); + Py_XDECREF(setstate_cython); + return ret; +} +#endif + +/* Import */ +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { + PyObject *module = 0; + PyObject *empty_dict = 0; + PyObject *empty_list = 0; + #if PY_MAJOR_VERSION < 3 + PyObject *py_import; + py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import); + if (unlikely(!py_import)) + goto bad; + if (!from_list) { + empty_list = PyList_New(0); + if (unlikely(!empty_list)) + goto bad; + from_list = empty_list; + } + #endif + empty_dict = PyDict_New(); + if (unlikely(!empty_dict)) + goto bad; + { + #if PY_MAJOR_VERSION >= 3 + if (level == -1) { + if (strchr(__Pyx_MODULE_NAME, '.') != NULL) { + module = PyImport_ImportModuleLevelObject( + name, __pyx_d, empty_dict, from_list, 1); + if (unlikely(!module)) { + if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError))) + goto bad; + PyErr_Clear(); + } + } + level = 0; + } + #endif + if (!module) { + #if PY_MAJOR_VERSION < 3 + PyObject *py_level = PyInt_FromLong(level); + if (unlikely(!py_level)) + goto bad; + module = PyObject_CallFunctionObjArgs(py_import, + name, __pyx_d, empty_dict, from_list, py_level, (PyObject *)NULL); + Py_DECREF(py_level); + #else + module = PyImport_ImportModuleLevelObject( + name, __pyx_d, empty_dict, from_list, level); + #endif + } + } +bad: + Py_XDECREF(empty_dict); + Py_XDECREF(empty_list); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(py_import); + #endif + return module; +} + +/* ImportDottedModule */ +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx__ImportDottedModule_Error(PyObject *name, PyObject *parts_tuple, Py_ssize_t count) { + PyObject *partial_name = NULL, *slice = NULL, *sep = NULL; + if (unlikely(PyErr_Occurred())) { + PyErr_Clear(); + } + if (likely(PyTuple_GET_SIZE(parts_tuple) == count)) { + partial_name = name; + } else { + slice = PySequence_GetSlice(parts_tuple, 0, count); + if (unlikely(!slice)) + goto bad; + sep = PyUnicode_FromStringAndSize(".", 1); + if (unlikely(!sep)) + goto bad; + partial_name = PyUnicode_Join(sep, slice); + } + PyErr_Format( +#if PY_MAJOR_VERSION < 3 + PyExc_ImportError, + "No module named '%s'", PyString_AS_STRING(partial_name)); +#else +#if PY_VERSION_HEX >= 0x030600B1 + PyExc_ModuleNotFoundError, +#else + PyExc_ImportError, +#endif + "No module named '%U'", partial_name); +#endif +bad: + Py_XDECREF(sep); + Py_XDECREF(slice); + Py_XDECREF(partial_name); + return NULL; +} +#endif +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx__ImportDottedModule_Lookup(PyObject *name) { + PyObject *imported_module; +#if PY_VERSION_HEX < 0x030700A1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM < 0x07030400) + PyObject *modules = PyImport_GetModuleDict(); + if (unlikely(!modules)) + return NULL; + imported_module = __Pyx_PyDict_GetItemStr(modules, name); + Py_XINCREF(imported_module); +#else + imported_module = PyImport_GetModule(name); +#endif + return imported_module; +} +#endif +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple) { + Py_ssize_t i, nparts; + nparts = PyTuple_GET_SIZE(parts_tuple); + for (i=1; i < nparts && module; i++) { + PyObject *part, *submodule; +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + part = PyTuple_GET_ITEM(parts_tuple, i); +#else + part = PySequence_ITEM(parts_tuple, i); +#endif + submodule = __Pyx_PyObject_GetAttrStrNoError(module, part); +#if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) + Py_DECREF(part); +#endif + Py_DECREF(module); + module = submodule; + } + if (unlikely(!module)) { + return __Pyx__ImportDottedModule_Error(name, parts_tuple, i); + } + return module; +} +#endif +static PyObject *__Pyx__ImportDottedModule(PyObject *name, PyObject *parts_tuple) { +#if PY_MAJOR_VERSION < 3 + PyObject *module, *from_list, *star = __pyx_n_s__171; + CYTHON_UNUSED_VAR(parts_tuple); + from_list = PyList_New(1); + if (unlikely(!from_list)) + return NULL; + Py_INCREF(star); + PyList_SET_ITEM(from_list, 0, star); + module = __Pyx_Import(name, from_list, 0); + Py_DECREF(from_list); + return module; +#else + PyObject *imported_module; + PyObject *module = __Pyx_Import(name, NULL, 0); + if (!parts_tuple || unlikely(!module)) + return module; + imported_module = __Pyx__ImportDottedModule_Lookup(name); + if (likely(imported_module)) { + Py_DECREF(module); + return imported_module; + } + PyErr_Clear(); + return __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple); +#endif +} +static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple) { +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030400B1 + PyObject *module = __Pyx__ImportDottedModule_Lookup(name); + if (likely(module)) { + PyObject *spec = __Pyx_PyObject_GetAttrStrNoError(module, __pyx_n_s_spec); + if (likely(spec)) { + PyObject *unsafe = __Pyx_PyObject_GetAttrStrNoError(spec, __pyx_n_s_initializing); + if (likely(!unsafe || !__Pyx_PyObject_IsTrue(unsafe))) { + Py_DECREF(spec); + spec = NULL; + } + Py_XDECREF(unsafe); + } + if (likely(!spec)) { + PyErr_Clear(); + return module; + } + Py_DECREF(spec); + Py_DECREF(module); + } else if (PyErr_Occurred()) { + PyErr_Clear(); + } +#endif + return __Pyx__ImportDottedModule(name, parts_tuple); +} + +/* ImportFrom */ +static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) { + PyObject* value = __Pyx_PyObject_GetAttrStr(module, name); + if (unlikely(!value) && PyErr_ExceptionMatches(PyExc_AttributeError)) { + const char* module_name_str = 0; + PyObject* module_name = 0; + PyObject* module_dot = 0; + PyObject* full_name = 0; + PyErr_Clear(); + module_name_str = PyModule_GetName(module); + if (unlikely(!module_name_str)) { goto modbad; } + module_name = PyUnicode_FromString(module_name_str); + if (unlikely(!module_name)) { goto modbad; } + module_dot = PyUnicode_Concat(module_name, __pyx_kp_u__140); + if (unlikely(!module_dot)) { goto modbad; } + full_name = PyUnicode_Concat(module_dot, name); + if (unlikely(!full_name)) { goto modbad; } + #if PY_VERSION_HEX < 0x030700A1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM < 0x07030400) + { + PyObject *modules = PyImport_GetModuleDict(); + if (unlikely(!modules)) + goto modbad; + value = PyObject_GetItem(modules, full_name); + } + #else + value = PyImport_GetModule(full_name); + #endif + modbad: + Py_XDECREF(full_name); + Py_XDECREF(module_dot); + Py_XDECREF(module_name); + } + if (unlikely(!value)) { + PyErr_Format(PyExc_ImportError, + #if PY_MAJOR_VERSION < 3 + "cannot import name %.230s", PyString_AS_STRING(name)); + #else + "cannot import name %S", name); + #endif + } + return value; +} + +/* CLineInTraceback */ +#ifndef CYTHON_CLINE_IN_TRACEBACK +static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line) { + PyObject *use_cline; + PyObject *ptype, *pvalue, *ptraceback; +#if CYTHON_COMPILING_IN_CPYTHON + PyObject **cython_runtime_dict; +#endif + CYTHON_MAYBE_UNUSED_VAR(tstate); + if (unlikely(!__pyx_cython_runtime)) { + return c_line; + } + __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); +#if CYTHON_COMPILING_IN_CPYTHON + cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime); + if (likely(cython_runtime_dict)) { + __PYX_PY_DICT_LOOKUP_IF_MODIFIED( + use_cline, *cython_runtime_dict, + __Pyx_PyDict_GetItemStr(*cython_runtime_dict, __pyx_n_s_cline_in_traceback)) + } else +#endif + { + PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStrNoError(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback); + if (use_cline_obj) { + use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True; + Py_DECREF(use_cline_obj); + } else { + PyErr_Clear(); + use_cline = NULL; + } + } + if (!use_cline) { + c_line = 0; + (void) PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False); + } + else if (use_cline == Py_False || (use_cline != Py_True && PyObject_Not(use_cline) != 0)) { + c_line = 0; + } + __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); + return c_line; +} +#endif + +/* CodeObjectCache */ +#if !CYTHON_COMPILING_IN_LIMITED_API +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { + int start = 0, mid = 0, end = count - 1; + if (end >= 0 && code_line > entries[end].code_line) { + return count; + } + while (start < end) { + mid = start + (end - start) / 2; + if (code_line < entries[mid].code_line) { + end = mid; + } else if (code_line > entries[mid].code_line) { + start = mid + 1; + } else { + return mid; + } + } + if (code_line <= entries[mid].code_line) { + return mid; + } else { + return mid + 1; + } +} +static PyCodeObject *__pyx_find_code_object(int code_line) { + PyCodeObject* code_object; + int pos; + if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { + return NULL; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { + return NULL; + } + code_object = __pyx_code_cache.entries[pos].code_object; + Py_INCREF(code_object); + return code_object; +} +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { + int pos, i; + __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; + if (unlikely(!code_line)) { + return; + } + if (unlikely(!entries)) { + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); + if (likely(entries)) { + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = 64; + __pyx_code_cache.count = 1; + entries[0].code_line = code_line; + entries[0].code_object = code_object; + Py_INCREF(code_object); + } + return; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { + PyCodeObject* tmp = entries[pos].code_object; + entries[pos].code_object = code_object; + Py_DECREF(tmp); + return; + } + if (__pyx_code_cache.count == __pyx_code_cache.max_count) { + int new_max = __pyx_code_cache.max_count + 64; + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( + __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry)); + if (unlikely(!entries)) { + return; + } + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = new_max; + } + for (i=__pyx_code_cache.count; i>pos; i--) { + entries[i] = entries[i-1]; + } + entries[pos].code_line = code_line; + entries[pos].code_object = code_object; + __pyx_code_cache.count++; + Py_INCREF(code_object); +} +#endif + +/* AddTraceback */ +#include "compile.h" +#include "frameobject.h" +#include "traceback.h" +#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API && !defined(PYPY_VERSION) + #ifndef Py_BUILD_CORE + #define Py_BUILD_CORE 1 + #endif + #include "internal/pycore_frame.h" +#endif +#if CYTHON_COMPILING_IN_LIMITED_API +static PyObject *__Pyx_PyCode_Replace_For_AddTraceback(PyObject *code, PyObject *scratch_dict, + PyObject *firstlineno, PyObject *name) { + PyObject *replace = NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "co_firstlineno", firstlineno))) return NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "co_name", name))) return NULL; + replace = PyObject_GetAttrString(code, "replace"); + if (likely(replace)) { + PyObject *result; + result = PyObject_Call(replace, __pyx_empty_tuple, scratch_dict); + Py_DECREF(replace); + return result; + } + PyErr_Clear(); + #if __PYX_LIMITED_VERSION_HEX < 0x030780000 + { + PyObject *compiled = NULL, *result = NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "code", code))) return NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "type", (PyObject*)(&PyType_Type)))) return NULL; + compiled = Py_CompileString( + "out = type(code)(\n" + " code.co_argcount, code.co_kwonlyargcount, code.co_nlocals, code.co_stacksize,\n" + " code.co_flags, code.co_code, code.co_consts, code.co_names,\n" + " code.co_varnames, code.co_filename, co_name, co_firstlineno,\n" + " code.co_lnotab)\n", "", Py_file_input); + if (!compiled) return NULL; + result = PyEval_EvalCode(compiled, scratch_dict, scratch_dict); + Py_DECREF(compiled); + if (!result) PyErr_Print(); + Py_DECREF(result); + result = PyDict_GetItemString(scratch_dict, "out"); + if (result) Py_INCREF(result); + return result; + } + #else + return NULL; + #endif +} +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename) { + PyObject *code_object = NULL, *py_py_line = NULL, *py_funcname = NULL, *dict = NULL; + PyObject *replace = NULL, *getframe = NULL, *frame = NULL; + PyObject *exc_type, *exc_value, *exc_traceback; + int success = 0; + if (c_line) { + (void) __pyx_cfilenm; + (void) __Pyx_CLineForTraceback(__Pyx_PyThreadState_Current, c_line); + } + PyErr_Fetch(&exc_type, &exc_value, &exc_traceback); + code_object = Py_CompileString("_getframe()", filename, Py_eval_input); + if (unlikely(!code_object)) goto bad; + py_py_line = PyLong_FromLong(py_line); + if (unlikely(!py_py_line)) goto bad; + py_funcname = PyUnicode_FromString(funcname); + if (unlikely(!py_funcname)) goto bad; + dict = PyDict_New(); + if (unlikely(!dict)) goto bad; + { + PyObject *old_code_object = code_object; + code_object = __Pyx_PyCode_Replace_For_AddTraceback(code_object, dict, py_py_line, py_funcname); + Py_DECREF(old_code_object); + } + if (unlikely(!code_object)) goto bad; + getframe = PySys_GetObject("_getframe"); + if (unlikely(!getframe)) goto bad; + if (unlikely(PyDict_SetItemString(dict, "_getframe", getframe))) goto bad; + frame = PyEval_EvalCode(code_object, dict, dict); + if (unlikely(!frame) || frame == Py_None) goto bad; + success = 1; + bad: + PyErr_Restore(exc_type, exc_value, exc_traceback); + Py_XDECREF(code_object); + Py_XDECREF(py_py_line); + Py_XDECREF(py_funcname); + Py_XDECREF(dict); + Py_XDECREF(replace); + if (success) { + PyTraceBack_Here( + (struct _frame*)frame); + } + Py_XDECREF(frame); +} +#else +static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( + const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = NULL; + PyObject *py_funcname = NULL; + #if PY_MAJOR_VERSION < 3 + PyObject *py_srcfile = NULL; + py_srcfile = PyString_FromString(filename); + if (!py_srcfile) goto bad; + #endif + if (c_line) { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + if (!py_funcname) goto bad; + #else + py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + if (!py_funcname) goto bad; + funcname = PyUnicode_AsUTF8(py_funcname); + if (!funcname) goto bad; + #endif + } + else { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromString(funcname); + if (!py_funcname) goto bad; + #endif + } + #if PY_MAJOR_VERSION < 3 + py_code = __Pyx_PyCode_New( + 0, + 0, + 0, + 0, + 0, + 0, + __pyx_empty_bytes, /*PyObject *code,*/ + __pyx_empty_tuple, /*PyObject *consts,*/ + __pyx_empty_tuple, /*PyObject *names,*/ + __pyx_empty_tuple, /*PyObject *varnames,*/ + __pyx_empty_tuple, /*PyObject *freevars,*/ + __pyx_empty_tuple, /*PyObject *cellvars,*/ + py_srcfile, /*PyObject *filename,*/ + py_funcname, /*PyObject *name,*/ + py_line, + __pyx_empty_bytes /*PyObject *lnotab*/ + ); + Py_DECREF(py_srcfile); + #else + py_code = PyCode_NewEmpty(filename, funcname, py_line); + #endif + Py_XDECREF(py_funcname); + return py_code; +bad: + Py_XDECREF(py_funcname); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(py_srcfile); + #endif + return NULL; +} +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = 0; + PyFrameObject *py_frame = 0; + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject *ptype, *pvalue, *ptraceback; + if (c_line) { + c_line = __Pyx_CLineForTraceback(tstate, c_line); + } + py_code = __pyx_find_code_object(c_line ? -c_line : py_line); + if (!py_code) { + __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); + py_code = __Pyx_CreateCodeObjectForTraceback( + funcname, c_line, py_line, filename); + if (!py_code) { + /* If the code object creation fails, then we should clear the + fetched exception references and propagate the new exception */ + Py_XDECREF(ptype); + Py_XDECREF(pvalue); + Py_XDECREF(ptraceback); + goto bad; + } + __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); + __pyx_insert_code_object(c_line ? -c_line : py_line, py_code); + } + py_frame = PyFrame_New( + tstate, /*PyThreadState *tstate,*/ + py_code, /*PyCodeObject *code,*/ + __pyx_d, /*PyObject *globals,*/ + 0 /*PyObject *locals*/ + ); + if (!py_frame) goto bad; + __Pyx_PyFrame_SetLineNumber(py_frame, py_line); + PyTraceBack_Here(py_frame); +bad: + Py_XDECREF(py_code); + Py_XDECREF(py_frame); +} +#endif + +/* CIntFromPyVerify */ +#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) +#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) +#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ + {\ + func_type value = func_value;\ + if (sizeof(target_type) < sizeof(func_type)) {\ + if (unlikely(value != (func_type) (target_type) value)) {\ + func_type zero = 0;\ + if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ + return (target_type) -1;\ + if (is_unsigned && unlikely(value < zero))\ + goto raise_neg_overflow;\ + else\ + goto raise_overflow;\ + }\ + }\ + return (target_type) value;\ + } + +/* CIntToPy */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const long neg_one = (long) -1, const_zero = (long) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(long) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(long) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(long) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + unsigned char *bytes = (unsigned char *)&value; +#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 + if (is_unsigned) { + return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); + } else { + return PyLong_FromNativeBytes(bytes, sizeof(value), -1); + } +#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 + int one = 1; int little = (int)*(unsigned char *)&one; + return _PyLong_FromByteArray(bytes, sizeof(long), + little, !is_unsigned); +#else + int one = 1; int little = (int)*(unsigned char *)&one; + PyObject *from_bytes, *result = NULL; + PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; + from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); + if (!from_bytes) return NULL; + py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(long)); + if (!py_bytes) goto limited_bad; + order_str = PyUnicode_FromString(little ? "little" : "big"); + if (!order_str) goto limited_bad; + arg_tuple = PyTuple_Pack(2, py_bytes, order_str); + if (!arg_tuple) goto limited_bad; + if (!is_unsigned) { + kwds = PyDict_New(); + if (!kwds) goto limited_bad; + if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; + } + result = PyObject_Call(from_bytes, arg_tuple, kwds); + limited_bad: + Py_XDECREF(kwds); + Py_XDECREF(arg_tuple); + Py_XDECREF(order_str); + Py_XDECREF(py_bytes); + Py_XDECREF(from_bytes); + return result; +#endif + } +} + +/* CIntFromPy */ +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const long neg_one = (long) -1, const_zero = (long) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if ((sizeof(long) < sizeof(long))) { + __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (long) val; + } + } +#endif + if (unlikely(!PyLong_Check(x))) { + long val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (long) -1; + val = __Pyx_PyInt_As_long(tmp); + Py_DECREF(tmp); + return val; + } + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + if (unlikely(__Pyx_PyLong_IsNeg(x))) { + goto raise_neg_overflow; + } else if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_DigitCount(x)) { + case 2: + if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) >= 2 * PyLong_SHIFT)) { + return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 3: + if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) >= 3 * PyLong_SHIFT)) { + return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 4: + if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) >= 4 * PyLong_SHIFT)) { + return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + } + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (long) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if ((sizeof(long) <= sizeof(unsigned long))) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(long) <= sizeof(unsigned PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_SignedDigitCount(x)) { + case -2: + if ((8 * sizeof(long) - 1 > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { + return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 2: + if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { + return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -3: + if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { + return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 3: + if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { + return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -4: + if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { + return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 4: + if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { + return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + } + } +#endif + if ((sizeof(long) <= sizeof(long))) { + __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(long) <= sizeof(PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { + long val; + int ret = -1; +#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API + Py_ssize_t bytes_copied = PyLong_AsNativeBytes( + x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); + if (unlikely(bytes_copied == -1)) { + } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { + goto raise_overflow; + } else { + ret = 0; + } +#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + ret = _PyLong_AsByteArray((PyLongObject *)x, + bytes, sizeof(val), + is_little, !is_unsigned); +#else + PyObject *v; + PyObject *stepval = NULL, *mask = NULL, *shift = NULL; + int bits, remaining_bits, is_negative = 0; + int chunk_size = (sizeof(long) < 8) ? 30 : 62; + if (likely(PyLong_CheckExact(x))) { + v = __Pyx_NewRef(x); + } else { + v = PyNumber_Long(x); + if (unlikely(!v)) return (long) -1; + assert(PyLong_CheckExact(v)); + } + { + int result = PyObject_RichCompareBool(v, Py_False, Py_LT); + if (unlikely(result < 0)) { + Py_DECREF(v); + return (long) -1; + } + is_negative = result == 1; + } + if (is_unsigned && unlikely(is_negative)) { + Py_DECREF(v); + goto raise_neg_overflow; + } else if (is_negative) { + stepval = PyNumber_Invert(v); + Py_DECREF(v); + if (unlikely(!stepval)) + return (long) -1; + } else { + stepval = v; + } + v = NULL; + val = (long) 0; + mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; + shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; + for (bits = 0; bits < (int) sizeof(long) * 8 - chunk_size; bits += chunk_size) { + PyObject *tmp, *digit; + long idigit; + digit = PyNumber_And(stepval, mask); + if (unlikely(!digit)) goto done; + idigit = PyLong_AsLong(digit); + Py_DECREF(digit); + if (unlikely(idigit < 0)) goto done; + val |= ((long) idigit) << bits; + tmp = PyNumber_Rshift(stepval, shift); + if (unlikely(!tmp)) goto done; + Py_DECREF(stepval); stepval = tmp; + } + Py_DECREF(shift); shift = NULL; + Py_DECREF(mask); mask = NULL; + { + long idigit = PyLong_AsLong(stepval); + if (unlikely(idigit < 0)) goto done; + remaining_bits = ((int) sizeof(long) * 8) - bits - (is_unsigned ? 0 : 1); + if (unlikely(idigit >= (1L << remaining_bits))) + goto raise_overflow; + val |= ((long) idigit) << bits; + } + if (!is_unsigned) { + if (unlikely(val & (((long) 1) << (sizeof(long) * 8 - 1)))) + goto raise_overflow; + if (is_negative) + val = ~val; + } + ret = 0; + done: + Py_XDECREF(shift); + Py_XDECREF(mask); + Py_XDECREF(stepval); +#endif + if (unlikely(ret)) + return (long) -1; + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to long"); + return (long) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to long"); + return (long) -1; +} + +/* CIntFromPy */ +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const int neg_one = (int) -1, const_zero = (int) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if ((sizeof(int) < sizeof(long))) { + __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (int) val; + } + } +#endif + if (unlikely(!PyLong_Check(x))) { + int val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (int) -1; + val = __Pyx_PyInt_As_int(tmp); + Py_DECREF(tmp); + return val; + } + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + if (unlikely(__Pyx_PyLong_IsNeg(x))) { + goto raise_neg_overflow; + } else if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_DigitCount(x)) { + case 2: + if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) >= 2 * PyLong_SHIFT)) { + return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 3: + if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) >= 3 * PyLong_SHIFT)) { + return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 4: + if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) >= 4 * PyLong_SHIFT)) { + return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + } + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (int) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if ((sizeof(int) <= sizeof(unsigned long))) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(int) <= sizeof(unsigned PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_SignedDigitCount(x)) { + case -2: + if ((8 * sizeof(int) - 1 > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { + return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 2: + if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { + return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -3: + if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { + return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 3: + if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { + return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -4: + if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { + return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 4: + if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { + return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + } + } +#endif + if ((sizeof(int) <= sizeof(long))) { + __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(int) <= sizeof(PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { + int val; + int ret = -1; +#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API + Py_ssize_t bytes_copied = PyLong_AsNativeBytes( + x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); + if (unlikely(bytes_copied == -1)) { + } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { + goto raise_overflow; + } else { + ret = 0; + } +#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + ret = _PyLong_AsByteArray((PyLongObject *)x, + bytes, sizeof(val), + is_little, !is_unsigned); +#else + PyObject *v; + PyObject *stepval = NULL, *mask = NULL, *shift = NULL; + int bits, remaining_bits, is_negative = 0; + int chunk_size = (sizeof(long) < 8) ? 30 : 62; + if (likely(PyLong_CheckExact(x))) { + v = __Pyx_NewRef(x); + } else { + v = PyNumber_Long(x); + if (unlikely(!v)) return (int) -1; + assert(PyLong_CheckExact(v)); + } + { + int result = PyObject_RichCompareBool(v, Py_False, Py_LT); + if (unlikely(result < 0)) { + Py_DECREF(v); + return (int) -1; + } + is_negative = result == 1; + } + if (is_unsigned && unlikely(is_negative)) { + Py_DECREF(v); + goto raise_neg_overflow; + } else if (is_negative) { + stepval = PyNumber_Invert(v); + Py_DECREF(v); + if (unlikely(!stepval)) + return (int) -1; + } else { + stepval = v; + } + v = NULL; + val = (int) 0; + mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; + shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; + for (bits = 0; bits < (int) sizeof(int) * 8 - chunk_size; bits += chunk_size) { + PyObject *tmp, *digit; + long idigit; + digit = PyNumber_And(stepval, mask); + if (unlikely(!digit)) goto done; + idigit = PyLong_AsLong(digit); + Py_DECREF(digit); + if (unlikely(idigit < 0)) goto done; + val |= ((int) idigit) << bits; + tmp = PyNumber_Rshift(stepval, shift); + if (unlikely(!tmp)) goto done; + Py_DECREF(stepval); stepval = tmp; + } + Py_DECREF(shift); shift = NULL; + Py_DECREF(mask); mask = NULL; + { + long idigit = PyLong_AsLong(stepval); + if (unlikely(idigit < 0)) goto done; + remaining_bits = ((int) sizeof(int) * 8) - bits - (is_unsigned ? 0 : 1); + if (unlikely(idigit >= (1L << remaining_bits))) + goto raise_overflow; + val |= ((int) idigit) << bits; + } + if (!is_unsigned) { + if (unlikely(val & (((int) 1) << (sizeof(int) * 8 - 1)))) + goto raise_overflow; + if (is_negative) + val = ~val; + } + ret = 0; + done: + Py_XDECREF(shift); + Py_XDECREF(mask); + Py_XDECREF(stepval); +#endif + if (unlikely(ret)) + return (int) -1; + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to int"); + return (int) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to int"); + return (int) -1; +} + +/* CIntToPy */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const int neg_one = (int) -1, const_zero = (int) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(int) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(int) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(int) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + unsigned char *bytes = (unsigned char *)&value; +#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 + if (is_unsigned) { + return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); + } else { + return PyLong_FromNativeBytes(bytes, sizeof(value), -1); + } +#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 + int one = 1; int little = (int)*(unsigned char *)&one; + return _PyLong_FromByteArray(bytes, sizeof(int), + little, !is_unsigned); +#else + int one = 1; int little = (int)*(unsigned char *)&one; + PyObject *from_bytes, *result = NULL; + PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; + from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); + if (!from_bytes) return NULL; + py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(int)); + if (!py_bytes) goto limited_bad; + order_str = PyUnicode_FromString(little ? "little" : "big"); + if (!order_str) goto limited_bad; + arg_tuple = PyTuple_Pack(2, py_bytes, order_str); + if (!arg_tuple) goto limited_bad; + if (!is_unsigned) { + kwds = PyDict_New(); + if (!kwds) goto limited_bad; + if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; + } + result = PyObject_Call(from_bytes, arg_tuple, kwds); + limited_bad: + Py_XDECREF(kwds); + Py_XDECREF(arg_tuple); + Py_XDECREF(order_str); + Py_XDECREF(py_bytes); + Py_XDECREF(from_bytes); + return result; +#endif + } +} + +/* FormatTypeName */ +#if CYTHON_COMPILING_IN_LIMITED_API +static __Pyx_TypeName +__Pyx_PyType_GetName(PyTypeObject* tp) +{ + PyObject *name = __Pyx_PyObject_GetAttrStr((PyObject *)tp, + __pyx_n_s_name); + if (unlikely(name == NULL) || unlikely(!PyUnicode_Check(name))) { + PyErr_Clear(); + Py_XDECREF(name); + name = __Pyx_NewRef(__pyx_n_s__182); + } + return name; +} +#endif + +/* FastTypeChecks */ +#if CYTHON_COMPILING_IN_CPYTHON +static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) { + while (a) { + a = __Pyx_PyType_GetSlot(a, tp_base, PyTypeObject*); + if (a == b) + return 1; + } + return b == &PyBaseObject_Type; +} +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) { + PyObject *mro; + if (a == b) return 1; + mro = a->tp_mro; + if (likely(mro)) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(mro); + for (i = 0; i < n; i++) { + if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b) + return 1; + } + return 0; + } + return __Pyx_InBases(a, b); +} +static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b) { + PyObject *mro; + if (cls == a || cls == b) return 1; + mro = cls->tp_mro; + if (likely(mro)) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(mro); + for (i = 0; i < n; i++) { + PyObject *base = PyTuple_GET_ITEM(mro, i); + if (base == (PyObject *)a || base == (PyObject *)b) + return 1; + } + return 0; + } + return __Pyx_InBases(cls, a) || __Pyx_InBases(cls, b); +} +#if PY_MAJOR_VERSION == 2 +static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) { + PyObject *exception, *value, *tb; + int res; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&exception, &value, &tb); + res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0; + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + if (!res) { + res = PyObject_IsSubclass(err, exc_type2); + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + } + __Pyx_ErrRestore(exception, value, tb); + return res; +} +#else +static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) { + if (exc_type1) { + return __Pyx_IsAnySubtype2((PyTypeObject*)err, (PyTypeObject*)exc_type1, (PyTypeObject*)exc_type2); + } else { + return __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2); + } +} +#endif +static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { + Py_ssize_t i, n; + assert(PyExceptionClass_Check(exc_type)); + n = PyTuple_GET_SIZE(tuple); +#if PY_MAJOR_VERSION >= 3 + for (i=0; i= 0x030B00A4 + return Py_Version & ~0xFFUL; +#else + const char* rt_version = Py_GetVersion(); + unsigned long version = 0; + unsigned long factor = 0x01000000UL; + unsigned int digit = 0; + int i = 0; + while (factor) { + while ('0' <= rt_version[i] && rt_version[i] <= '9') { + digit = digit * 10 + (unsigned int) (rt_version[i] - '0'); + ++i; + } + version += factor * digit; + if (rt_version[i] != '.') + break; + digit = 0; + factor >>= 8; + ++i; + } + return version; +#endif +} +static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer) { + const unsigned long MAJOR_MINOR = 0xFFFF0000UL; + if ((rt_version & MAJOR_MINOR) == (ct_version & MAJOR_MINOR)) + return 0; + if (likely(allow_newer && (rt_version & MAJOR_MINOR) > (ct_version & MAJOR_MINOR))) + return 1; + { + char message[200]; + PyOS_snprintf(message, sizeof(message), + "compile time Python version %d.%d " + "of module '%.100s' " + "%s " + "runtime version %d.%d", + (int) (ct_version >> 24), (int) ((ct_version >> 16) & 0xFF), + __Pyx_MODULE_NAME, + (allow_newer) ? "was newer than" : "does not match", + (int) (rt_version >> 24), (int) ((rt_version >> 16) & 0xFF) + ); + return PyErr_WarnEx(NULL, message, 1); + } +} + +/* InitStrings */ +#if PY_MAJOR_VERSION >= 3 +static int __Pyx_InitString(__Pyx_StringTabEntry t, PyObject **str) { + if (t.is_unicode | t.is_str) { + if (t.intern) { + *str = PyUnicode_InternFromString(t.s); + } else if (t.encoding) { + *str = PyUnicode_Decode(t.s, t.n - 1, t.encoding, NULL); + } else { + *str = PyUnicode_FromStringAndSize(t.s, t.n - 1); + } + } else { + *str = PyBytes_FromStringAndSize(t.s, t.n - 1); + } + if (!*str) + return -1; + if (PyObject_Hash(*str) == -1) + return -1; + return 0; +} +#endif +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { + while (t->p) { + #if PY_MAJOR_VERSION >= 3 + __Pyx_InitString(*t, t->p); + #else + if (t->is_unicode) { + *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); + } else if (t->intern) { + *t->p = PyString_InternFromString(t->s); + } else { + *t->p = PyString_FromStringAndSize(t->s, t->n - 1); + } + if (!*t->p) + return -1; + if (PyObject_Hash(*t->p) == -1) + return -1; + #endif + ++t; + } + return 0; +} + +#include +static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s) { + size_t len = strlen(s); + if (unlikely(len > (size_t) PY_SSIZE_T_MAX)) { + PyErr_SetString(PyExc_OverflowError, "byte string is too long"); + return -1; + } + return (Py_ssize_t) len; +} +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { + Py_ssize_t len = __Pyx_ssize_strlen(c_str); + if (unlikely(len < 0)) return NULL; + return __Pyx_PyUnicode_FromStringAndSize(c_str, len); +} +static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char* c_str) { + Py_ssize_t len = __Pyx_ssize_strlen(c_str); + if (unlikely(len < 0)) return NULL; + return PyByteArray_FromStringAndSize(c_str, len); +} +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) { + Py_ssize_t ignore; + return __Pyx_PyObject_AsStringAndSize(o, &ignore); +} +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +#if !CYTHON_PEP393_ENABLED +static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + char* defenc_c; + PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); + if (!defenc) return NULL; + defenc_c = PyBytes_AS_STRING(defenc); +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + { + char* end = defenc_c + PyBytes_GET_SIZE(defenc); + char* c; + for (c = defenc_c; c < end; c++) { + if ((unsigned char) (*c) >= 128) { + PyUnicode_AsASCIIString(o); + return NULL; + } + } + } +#endif + *length = PyBytes_GET_SIZE(defenc); + return defenc_c; +} +#else +static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL; +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + if (likely(PyUnicode_IS_ASCII(o))) { + *length = PyUnicode_GET_LENGTH(o); + return PyUnicode_AsUTF8(o); + } else { + PyUnicode_AsASCIIString(o); + return NULL; + } +#else + return PyUnicode_AsUTF8AndSize(o, length); +#endif +} +#endif +#endif +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT + if ( +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + __Pyx_sys_getdefaultencoding_not_ascii && +#endif + PyUnicode_Check(o)) { + return __Pyx_PyUnicode_AsStringAndSize(o, length); + } else +#endif +#if (!CYTHON_COMPILING_IN_PYPY && !CYTHON_COMPILING_IN_LIMITED_API) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) + if (PyByteArray_Check(o)) { + *length = PyByteArray_GET_SIZE(o); + return PyByteArray_AS_STRING(o); + } else +#endif + { + char* result; + int r = PyBytes_AsStringAndSize(o, &result, length); + if (unlikely(r < 0)) { + return NULL; + } else { + return result; + } + } +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { + int is_true = x == Py_True; + if (is_true | (x == Py_False) | (x == Py_None)) return is_true; + else return PyObject_IsTrue(x); +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) { + int retval; + if (unlikely(!x)) return -1; + retval = __Pyx_PyObject_IsTrue(x); + Py_DECREF(x); + return retval; +} +static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) { + __Pyx_TypeName result_type_name = __Pyx_PyType_GetName(Py_TYPE(result)); +#if PY_MAJOR_VERSION >= 3 + if (PyLong_Check(result)) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "__int__ returned non-int (type " __Pyx_FMT_TYPENAME "). " + "The ability to return an instance of a strict subclass of int is deprecated, " + "and may be removed in a future version of Python.", + result_type_name)) { + __Pyx_DECREF_TypeName(result_type_name); + Py_DECREF(result); + return NULL; + } + __Pyx_DECREF_TypeName(result_type_name); + return result; + } +#endif + PyErr_Format(PyExc_TypeError, + "__%.4s__ returned non-%.4s (type " __Pyx_FMT_TYPENAME ")", + type_name, type_name, result_type_name); + __Pyx_DECREF_TypeName(result_type_name); + Py_DECREF(result); + return NULL; +} +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { +#if CYTHON_USE_TYPE_SLOTS + PyNumberMethods *m; +#endif + const char *name = NULL; + PyObject *res = NULL; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x) || PyLong_Check(x))) +#else + if (likely(PyLong_Check(x))) +#endif + return __Pyx_NewRef(x); +#if CYTHON_USE_TYPE_SLOTS + m = Py_TYPE(x)->tp_as_number; + #if PY_MAJOR_VERSION < 3 + if (m && m->nb_int) { + name = "int"; + res = m->nb_int(x); + } + else if (m && m->nb_long) { + name = "long"; + res = m->nb_long(x); + } + #else + if (likely(m && m->nb_int)) { + name = "int"; + res = m->nb_int(x); + } + #endif +#else + if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) { + res = PyNumber_Int(x); + } +#endif + if (likely(res)) { +#if PY_MAJOR_VERSION < 3 + if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) { +#else + if (unlikely(!PyLong_CheckExact(res))) { +#endif + return __Pyx_PyNumber_IntOrLongWrongResultType(res, name); + } + } + else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_TypeError, + "an integer is required"); + } + return res; +} +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { + Py_ssize_t ival; + PyObject *x; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_CheckExact(b))) { + if (sizeof(Py_ssize_t) >= sizeof(long)) + return PyInt_AS_LONG(b); + else + return PyInt_AsSsize_t(b); + } +#endif + if (likely(PyLong_CheckExact(b))) { + #if CYTHON_USE_PYLONG_INTERNALS + if (likely(__Pyx_PyLong_IsCompact(b))) { + return __Pyx_PyLong_CompactValue(b); + } else { + const digit* digits = __Pyx_PyLong_Digits(b); + const Py_ssize_t size = __Pyx_PyLong_SignedDigitCount(b); + switch (size) { + case 2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + } + } + #endif + return PyLong_AsSsize_t(b); + } + x = PyNumber_Index(b); + if (!x) return -1; + ival = PyInt_AsSsize_t(x); + Py_DECREF(x); + return ival; +} +static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject* o) { + if (sizeof(Py_hash_t) == sizeof(Py_ssize_t)) { + return (Py_hash_t) __Pyx_PyIndex_AsSsize_t(o); +#if PY_MAJOR_VERSION < 3 + } else if (likely(PyInt_CheckExact(o))) { + return PyInt_AS_LONG(o); +#endif + } else { + Py_ssize_t ival; + PyObject *x; + x = PyNumber_Index(o); + if (!x) return -1; + ival = PyInt_AsLong(x); + Py_DECREF(x); + return ival; + } +} +static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) { + return b ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False); +} +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { + return PyInt_FromSize_t(ival); +} + + +/* #### Code section: utility_code_pragmas_end ### */ +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + + + +/* #### Code section: end ### */ +#endif /* Py_PYTHON_H */ diff --git a/libs/IndicTransToolkit/IndicTransToolkit/processor.cp313-win_amd64.pyd b/libs/IndicTransToolkit/IndicTransToolkit/processor.cp313-win_amd64.pyd new file mode 100644 index 0000000000000000000000000000000000000000..c4bfe2b588e8832955419f2edc6fff8fa6faa921 --- /dev/null +++ b/libs/IndicTransToolkit/IndicTransToolkit/processor.cp313-win_amd64.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6171c90f22a4602e67c36a791a618ccbf0d3703f17ea0c214186cb5fe3030487 +size 139776 diff --git a/libs/IndicTransToolkit/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so b/libs/IndicTransToolkit/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..b387863ea7c20b83a05339b2f3182116c779646f --- /dev/null +++ b/libs/IndicTransToolkit/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1a101ecb27adaf367f00c90b3f8e96e7fbda3bf0560d48c368fec3750a040a4 +size 229200 diff --git a/libs/IndicTransToolkit/IndicTransToolkit/processor.pyx b/libs/IndicTransToolkit/IndicTransToolkit/processor.pyx new file mode 100644 index 0000000000000000000000000000000000000000..ee67546a79c1b8a21a338fc7f98f71ba63da7807 --- /dev/null +++ b/libs/IndicTransToolkit/IndicTransToolkit/processor.pyx @@ -0,0 +1,503 @@ +# cython: language_level=3, boundscheck=False, cdivision=True, wraparound=False +""" +Cython version of the IndicProcessor class with optimizations for performance. +Only preprocess_batch and postprocess_batch are exposed as cpdef methods. +All other methods are internal (cdef) for optimized Cython usage. +""" + +import regex as re +from tqdm import tqdm +from queue import Queue +from typing import List, Dict, Union + +# Importing Python objects since these libraries don't offer C-extensions +from indicnlp.tokenize import indic_tokenize, indic_detokenize +from indicnlp.normalize.indic_normalize import IndicNormalizerFactory +from sacremoses import MosesPunctNormalizer, MosesTokenizer, MosesDetokenizer +from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator + + +cdef class IndicProcessor: + cdef public bint inference + + # Precompiled regex patterns and placeholders + cdef object _MULTISPACE_REGEX + cdef object _DIGIT_SPACE_PERCENT + cdef object _DOUBLE_QUOT_PUNC + cdef object _DIGIT_NBSP_DIGIT + cdef object _END_BRACKET_SPACE_PUNC_REGEX + + cdef object _URL_PATTERN + cdef object _NUMERAL_PATTERN + cdef object _EMAIL_PATTERN + cdef object _OTHER_PATTERN + + cdef list _PUNC_REPLACEMENTS + cdef list _INDIC_FAILURE_CASES + + cdef dict _flores_codes + cdef dict _digits_translation_table + + # Placeholder maps stored in a Python Queue (treated as `object` for Cython) + cdef object _placeholder_entity_maps + + # Tools (also Python objects) + cdef object _en_tok + cdef object _en_normalizer + cdef object _en_detok + cdef object _xliterator + + def __cinit__(self, bint inference=True): + """ + Constructor for IndicProcessor. Initializes all necessary components. + """ + self.inference = inference + + ############################## + # FLORES -> ISO CODES + ############################## + self._flores_codes = { + "asm_Beng": "as", + "awa_Deva": "hi", + "ben_Beng": "bn", + "bho_Deva": "hi", + "brx_Deva": "hi", + "doi_Deva": "hi", + "eng_Latn": "en", + "gom_Deva": "kK", + "gon_Deva": "hi", + "guj_Gujr": "gu", + "hin_Deva": "hi", + "hne_Deva": "hi", + "kan_Knda": "kn", + "kas_Arab": "ur", + "kas_Deva": "hi", + "kha_Latn": "en", + "lus_Latn": "en", + "mag_Deva": "hi", + "mai_Deva": "hi", + "mal_Mlym": "ml", + "mar_Deva": "mr", + "mni_Beng": "bn", + "mni_Mtei": "hi", + "npi_Deva": "ne", + "ory_Orya": "or", + "pan_Guru": "pa", + "san_Deva": "hi", + "sat_Olck": "or", + "snd_Arab": "ur", + "snd_Deva": "hi", + "tam_Taml": "ta", + "tel_Telu": "te", + "urd_Arab": "ur", + "unr_Deva": "hi", + } + + ############################## + # INDIC DIGIT TRANSLATION (str.translate) + ############################## + self._digits_translation_table = {} + cdef dict digits_dict = { + "\u09e6": "0", "\u0ae6": "0", "\u0ce6": "0", "\u0966": "0", + "\u0660": "0", "\uabf0": "0", "\u0b66": "0", "\u0a66": "0", + "\u1c50": "0", "\u06f0": "0", + + "\u09e7": "1", "\u0ae7": "1", "\u0967": "1", "\u0ce7": "1", + "\u06f1": "1", "\uabf1": "1", "\u0b67": "1", "\u0a67": "1", + "\u1c51": "1", "\u0c67": "1", + + "\u09e8": "2", "\u0ae8": "2", "\u0968": "2", "\u0ce8": "2", + "\u06f2": "2", "\uabf2": "2", "\u0b68": "2", "\u0a68": "2", + "\u1c52": "2", "\u0c68": "2", + + "\u09e9": "3", "\u0ae9": "3", "\u0969": "3", "\u0ce9": "3", + "\u06f3": "3", "\uabf3": "3", "\u0b69": "3", "\u0a69": "3", + "\u1c53": "3", "\u0c69": "3", + + "\u09ea": "4", "\u0aea": "4", "\u096a": "4", "\u0cea": "4", + "\u06f4": "4", "\uabf4": "4", "\u0b6a": "4", "\u0a6a": "4", + "\u1c54": "4", "\u0c6a": "4", + + "\u09eb": "5", "\u0aeb": "5", "\u096b": "5", "\u0ceb": "5", + "\u06f5": "5", "\uabf5": "5", "\u0b6b": "5", "\u0a6b": "5", + "\u1c55": "5", "\u0c6b": "5", + + "\u09ec": "6", "\u0aec": "6", "\u096c": "6", "\u0cec": "6", + "\u06f6": "6", "\uabf6": "6", "\u0b6c": "6", "\u0a6c": "6", + "\u1c56": "6", "\u0c6c": "6", + + "\u09ed": "7", "\u0aed": "7", "\u096d": "7", "\u0ced": "7", + "\u06f7": "7", "\uabf7": "7", "\u0b6d": "7", "\u0a6d": "7", + "\u1c57": "7", "\u0c6d": "7", + + "\u09ee": "8", "\u0aee": "8", "\u096e": "8", "\u0cee": "8", + "\u06f8": "8", "\uabf8": "8", "\u0b6e": "8", "\u0a6e": "8", + "\u1c58": "8", "\u0c6e": "8", + + "\u09ef": "9", "\u0aef": "9", "\u096f": "9", "\u0cef": "9", + "\u06f9": "9", "\uabf9": "9", "\u0b6f": "9", "\u0a6f": "9", + "\u1c59": "9", "\u0c6f": "9", + } + for k, v in digits_dict.items(): + self._digits_translation_table[ord(k)] = v + + # Also map ASCII '0'-'9' + for c in range(ord('0'), ord('9') + 1): + self._digits_translation_table[c] = chr(c) + + ############################## + # PLACEHOLDER MAP QUEUE + ############################## + self._placeholder_entity_maps = Queue() + + ############################## + # MOSES (as Python objects) + ############################## + self._en_tok = MosesTokenizer(lang="en") + self._en_normalizer = MosesPunctNormalizer() + self._en_detok = MosesDetokenizer(lang="en") + + ############################## + # TRANSLITERATOR (Python object) + ############################## + self._xliterator = UnicodeIndicTransliterator() + + ############################## + # Precompiled Patterns + ############################## + self._MULTISPACE_REGEX = re.compile(r"[ ]{2,}") + self._DIGIT_SPACE_PERCENT = re.compile(r"(\d) %") + self._DOUBLE_QUOT_PUNC = re.compile(r"\"([,\.]+)") + self._DIGIT_NBSP_DIGIT = re.compile(r"(\d) (\d)") + self._END_BRACKET_SPACE_PUNC_REGEX = re.compile(r"\) ([\.!:?;,])") + + self._URL_PATTERN = re.compile( + r"\b(?" + # Map various placeholder formats to the matched text + placeholder_entity_map[f""] = match + placeholder_entity_map[f"< ID{serial_no} >"] = match + placeholder_entity_map[f"[ID{serial_no}]"] = match + placeholder_entity_map[f"[ ID{serial_no} ]"] = match + placeholder_entity_map[f"[ID {serial_no}]"] = match + placeholder_entity_map[f""] = match + placeholder_entity_map[f"< {indic_case}{serial_no} >"] = match + placeholder_entity_map[f"< {indic_case} {serial_no} >"] = match + placeholder_entity_map[f"<{indic_case} {serial_no}]"] = match + placeholder_entity_map[f"< {indic_case} {serial_no} ]"] = match + placeholder_entity_map[f"[{indic_case}{serial_no}]"] = match + placeholder_entity_map[f"[{indic_case} {serial_no}]"] = match + placeholder_entity_map[f"[ {indic_case}{serial_no} ]"] = match + placeholder_entity_map[f"[ {indic_case} {serial_no} ]"] = match + placeholder_entity_map[f"{indic_case} {serial_no}"] = match + placeholder_entity_map[f"{indic_case}{serial_no}"] = match + + # Replace the match with the base placeholder + text = text.replace(match, base_placeholder) + serial_no += 1 + + # Clean up any remaining placeholder artifacts + text = re.sub(r"\s+", " ", text).replace(">/", ">").replace("]/", "]") + self._placeholder_entity_maps.put(placeholder_entity_map) + return text + + # Internal Method: Normalize Text + cdef str _normalize(self, str text) except *: + """ + Normalizes numerals and optionally wraps placeholders. + """ + # Single-pass digit translation + text = text.translate(self._digits_translation_table) + + if self.inference: + text = self._wrap_with_placeholders(text) + return text + + # Internal Method: Indic Tokenize and Transliterate + cdef str _do_indic_tokenize_and_transliterate( + self, + str sentence, + object normalizer, + str iso_lang, + bint transliterate + ) except *: + """ + Helper method: normalizes, tokenizes, optionally transliterates from iso_lang -> 'hi'. + """ + cdef str normed + cdef list tokens + cdef str joined + cdef str xlated + + normed = normalizer.normalize(sentence.strip()) + tokens = indic_tokenize.trivial_tokenize(normed, iso_lang) + joined = " ".join(tokens) + xlated = joined + if transliterate: + xlated = self._xliterator.transliterate(joined, iso_lang, "hi") + xlated = xlated.replace(" ् ", "्") + return xlated + + # Internal Method: Preprocess a Single Sentence + cdef str _preprocess( + self, + str sent, + str src_lang, + str tgt_lang, + object normalizer, + bint is_target + ) except *: + """ + Preprocess a single sentence: punctuation normalization, numeral normalization, + tokenization, transliteration, and adding language tags if necessary. + """ + cdef str iso_lang = self._flores_codes.get(src_lang, "hi") + cdef str script_part = src_lang.split("_")[1] + cdef bint do_transliterate = True + cdef str e_strip + cdef str e_norm + cdef list e_tokens + cdef str processed_sent + + # 1) Punctuation normalization + sent = self._punc_norm(sent) + + # 2) Numerals & placeholders + sent = self._normalize(sent) + + if script_part in ["Arab", "Aran", "Olck", "Mtei", "Latn"]: + do_transliterate = False + + if iso_lang == "en": + # English path + e_strip = sent.strip() + e_norm = self._en_normalizer.normalize(e_strip) + e_tokens = self._en_tok.tokenize(e_norm, escape=False) + processed_sent = " ".join(e_tokens) + else: + # Indic path + processed_sent = self._do_indic_tokenize_and_transliterate(sent, normalizer, iso_lang, do_transliterate) + + processed_sent = processed_sent.strip() + if not is_target: + return f"{src_lang} {tgt_lang} {processed_sent}" + else: + return processed_sent + + # Internal Method: Postprocess a Single Sentence + cdef str _postprocess(self, object sent, str lang) except *: + """ + Postprocess a single sentence: + 1) Pull placeholder map from queue + 2) Fix scripts for Perso-Arabic + 3) Restore placeholders + 4) Detokenize + """ + cdef dict placeholder_entity_map + cdef str lang_code + cdef str script_code + cdef str iso_lang + cdef str k + cdef str v + cdef str xlated + + # Unwrap if sent is a tuple or list + if isinstance(sent, (tuple, list)): + sent = sent[0] + + placeholder_entity_map = self._placeholder_entity_maps.get() + lang_code, script_code = lang.split("_", 1) + iso_lang = self._flores_codes.get(lang, "hi") + + # Fix for Perso-Arabic scripts + if script_code in ["Arab", "Aran"]: + sent = ( + sent.replace(" ؟", "؟") + .replace(" ۔", "۔") + .replace(" ،", "،") + .replace("ٮ۪", "ؠ") + ) + + # Oriya fix + if lang_code == "ory": + sent = sent.replace("ଯ଼", "ୟ") + + # Restore placeholders + for k, v in placeholder_entity_map.items(): + sent = sent.replace(k, v) + + # Detokenize + if lang == "eng_Latn": + return self._en_detok.detokenize(sent.split(" ")) + else: + xlated = self._xliterator.transliterate(sent, "hi", iso_lang) + return indic_detokenize.trivial_detokenize(xlated, iso_lang) + + # Exposed Method: Preprocess a Batch of Sentences + cpdef list preprocess_batch( + self, + List[str] batch, + str src_lang, + str tgt_lang=None, + bint is_target=False, + bint visualize=False + ): + """ + Preprocess an array of sentences (normalize, tokenize, transliterate). + This is exposed for external use. + """ + cdef object normalizer = None + cdef str iso_code = self._flores_codes.get(src_lang, "hi") + cdef object iterator + cdef list results + cdef int i + cdef int n = len(batch) + + if src_lang != "eng_Latn": + normalizer = IndicNormalizerFactory().get_normalizer(iso_code) + + if visualize: + iterator = tqdm(batch, total=n, desc=f" | > Pre-processing {src_lang}", unit="line") + else: + iterator = batch + + return [self._preprocess(s, src_lang, tgt_lang, normalizer, is_target) for s in iterator] + + # Exposed Method: Postprocess a Batch of Sentences + cpdef list postprocess_batch( + self, + List[str] sents, + str lang="hin_Deva", + bint visualize=False + ): + """ + Postprocess a batch of sentences: + Restore placeholders, fix script issues, and detokenize. + This is exposed for external use. + """ + cdef object iterator + cdef list results + cdef int i + cdef int n = len(sents) + + if visualize: + iterator = tqdm(sents, total=n, desc=f" | > Post-processing {lang}", unit="line") + else: + iterator = sents + + results = [self._postprocess(s, lang) for s in iterator] + self._placeholder_entity_maps.queue.clear() + + return results diff --git a/libs/IndicTransToolkit/IndicTransToolkit/version.py b/libs/IndicTransToolkit/IndicTransToolkit/version.py new file mode 100644 index 0000000000000000000000000000000000000000..ae174d2c7300c1eecc46072f4358e731667aed33 --- /dev/null +++ b/libs/IndicTransToolkit/IndicTransToolkit/version.py @@ -0,0 +1 @@ +__version__ = "1.0.3" diff --git a/libs/IndicTransToolkit/IndicTransToolkit/version.txt b/libs/IndicTransToolkit/IndicTransToolkit/version.txt new file mode 100644 index 0000000000000000000000000000000000000000..e4c0d46e55ffb2237c9e900aa77172886f6c8aa5 --- /dev/null +++ b/libs/IndicTransToolkit/IndicTransToolkit/version.txt @@ -0,0 +1 @@ +1.0.3 \ No newline at end of file diff --git a/libs/IndicTransToolkit/LICENSE b/libs/IndicTransToolkit/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..498691d06e5d9f57356c6f7f6930df80ec49b15f --- /dev/null +++ b/libs/IndicTransToolkit/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) Varun Gumma. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE \ No newline at end of file diff --git a/libs/IndicTransToolkit/README.md b/libs/IndicTransToolkit/README.md new file mode 100644 index 0000000000000000000000000000000000000000..692ece738b75029dfbf364fdeef428805db1e8b0 --- /dev/null +++ b/libs/IndicTransToolkit/README.md @@ -0,0 +1,97 @@ +# IndicTransToolkit + +## About +The goal of this repository is to provide a simple, modular, and extendable toolkit for [IndicTrans2](https://github.com/AI4Bharat/IndicTrans2) and be compatible with the HuggingFace models released. Please refer to the `CHANGELOG.md` for latest developments. + +## Pre-requisites + - `Python 3.8+` + - [Indic NLP Library](https://github.com/VarunGumma/indic_nlp_library) + - Other requirements as listed in `requirements.txt` + +## Configuration + - Editable installation (Note, this may take a while): +```bash +git clone https://github.com/VarunGumma/IndicTransToolkit +cd IndicTransToolkit + +pip install --editable . --use-pep517 # required for pip >= 25.0 + +# in case it fails, try: +# pip install --editable . --use-pep517 --config-settings editable_mode=compat +``` + +## Examples +For the training usecase, please refer [here](https://github.com/AI4Bharat/IndicTrans2/tree/main/huggingface_interface). + +### PreTainedTokenizer +```python +import torch +from IndicTransToolkit.processor import IndicProcessor # NOW IMPLEMENTED IN CYTHON !! +from transformers import AutoModelForSeq2SeqLM, AutoTokenizer + +ip = IndicProcessor(inference=True) +tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True) +model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True) + +sentences = [ + "This is a test sentence.", + "This is another longer different test sentence.", + "Please send an SMS to 9876543210 and an email on newemail123@xyz.com by 15th October, 2023.", +] + +batch = ip.preprocess_batch(sentences, src_lang="eng_Latn", tgt_lang="hin_Deva", visualize=False) # set it to visualize=True to print a progress bar +batch = tokenizer(batch, padding="longest", truncation=True, max_length=256, return_tensors="pt") + +with torch.inference_mode(): + outputs = model.generate(**batch, num_beams=5, num_return_sequences=1, max_length=256) + +with tokenizer.as_target_tokenizer(): + # This scoping is absolutely necessary, as it will instruct the tokenizer to tokenize using the target vocabulary. + # Failure to use this scoping will result in gibberish/unexpected predictions as the output will be de-tokenized with the source vocabulary instead. + outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True) + +outputs = ip.postprocess_batch(outputs, lang="hin_Deva") +print(outputs) + +>>> ['यह एक परीक्षण वाक्य है।', 'यह एक और लंबा अलग परीक्षण वाक्य है।', 'कृपया 9876543210 पर एक एस. एम. एस. भेजें और 15 अक्टूबर, 2023 तक newemail123@xyz.com पर एक ईमेल भेजें।'] +``` + +### Evaluation +- `IndicEvaluator` is a python implementation of [compute_metrics.sh](https://github.com/AI4Bharat/IndicTrans2/blob/main/compute_metrics.sh). +- We have found that this python implementation gives slightly lower scores than the original `compute_metrics.sh`. So, please use this function cautiously, and feel free to raise a PR if you have found the bug/fix. +```python +from IndicTransToolkit import IndicEvaluator + +# this method returns a dictionary with BLEU and ChrF2++ scores with appropriate signatures +evaluator = IndicEvaluator() +scores = evaluator.evaluate(tgt_lang=tgt_lang, preds=pred_file, refs=ref_file) + +# alternatively, you can pass the list of predictions and references instead of files +# scores = evaluator.evaluate(tgt_lang=tgt_lang, preds=preds, refs=refs) +``` + +## Authors + - Varun Gumma (varun230999@gmail.com) + - Jay Gala (jaygala24@gmail.com) + - Pranjal Agadh Chitale (pranjalchitale@gmail.com) + - Raj Dabre (prajdabre@gmail.com) + + +## Bugs and Contribution +Since this a bleeding-edge module, you may encounter broken stuff and import issues once in a while. In case you encounter any bugs or want additional functionalities, please feel free to raise `Issues`/`Pull Requests` or contact the authors. + + +## Citation +If you use our codebase, or models, please do cite the following paper: +```bibtex +@article{ + gala2023indictrans, + title={IndicTrans2: Towards High-Quality and Accessible Machine Translation Models for all 22 Scheduled Indian Languages}, + author={Jay Gala and Pranjal A Chitale and A K Raghavan and Varun Gumma and Sumanth Doddapaneni and Aswanth Kumar M and Janki Atul Nawale and Anupama Sujatha and Ratish Puduppully and Vivek Raghavan and Pratyush Kumar and Mitesh M Khapra and Raj Dabre and Anoop Kunchukuttan}, + journal={Transactions on Machine Learning Research}, + issn={2835-8856}, + year={2023}, + url={https://openreview.net/forum?id=vfT4YuzAYA}, + note={} +} +``` diff --git a/libs/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.cpython-310-x86_64-linux-gnu.so b/libs/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.cpython-310-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..01765caee820c9b115af62dd43c74a61694f2856 --- /dev/null +++ b/libs/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.cpython-310-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d57d4239b3638a272e4b70292f10494ee4a0fee201a9d74c62fc35a3d263a45 +size 260304 diff --git a/libs/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so b/libs/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..b387863ea7c20b83a05339b2f3182116c779646f --- /dev/null +++ b/libs/IndicTransToolkit/build/lib.linux-x86_64-cpython-310/IndicTransToolkit/processor.cpython-310-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1a101ecb27adaf367f00c90b3f8e96e7fbda3bf0560d48c368fec3750a040a4 +size 229200 diff --git a/libs/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.o b/libs/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.o new file mode 100644 index 0000000000000000000000000000000000000000..27265c8301ef533f6d71fae9155f794ab6a143c3 --- /dev/null +++ b/libs/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/fast_processor.o @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9e82df38b208dc0a9b468ff669c9da159c7deaabcb389fcfacd43e038504fec +size 347184 diff --git a/libs/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/processor.o b/libs/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/processor.o new file mode 100644 index 0000000000000000000000000000000000000000..1458e809a01af013191e526c0f7f45142fcb5bf6 --- /dev/null +++ b/libs/IndicTransToolkit/build/temp.linux-x86_64-cpython-310/IndicTransToolkit/processor.o @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d27c2cc00c97a89f97f7c28bc9175c5c403a0e2a372a0b39f1c5fe8609adda09 +size 303696 diff --git a/libs/IndicTransToolkit/pyproject.toml b/libs/IndicTransToolkit/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..3c285191a0288acb4b47e0744ac4bb01c28f9bcc --- /dev/null +++ b/libs/IndicTransToolkit/pyproject.toml @@ -0,0 +1,25 @@ +[build-system] +requires = [ + "setuptools>=68.2.2", + "wheel", + "Cython", +] +build-backend = "setuptools.build_meta" + +[tool.black] +# Black configuration for code formatting +line-length = 88 +target-version = ['py38'] +exclude = ''' +/( + \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist +)/ +''' \ No newline at end of file diff --git a/libs/IndicTransToolkit/requirements.txt b/libs/IndicTransToolkit/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..e22472b64d63e9f46b5d800696f64556f13692d2 --- /dev/null +++ b/libs/IndicTransToolkit/requirements.txt @@ -0,0 +1,8 @@ +setuptools>=68.2.2 +torch +cython +sacremoses +sentencepiece +transformers +sacrebleu +indic-nlp-library-IT2 @ git+https://github.com/VarunGumma/indic_nlp_library.git diff --git a/libs/IndicTransToolkit/setup.py b/libs/IndicTransToolkit/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..8b9bef168181fd18e7b34b0bda2a43d8c3e14982 --- /dev/null +++ b/libs/IndicTransToolkit/setup.py @@ -0,0 +1,61 @@ +import os +import pathlib +from sys import version_info, exit +from setuptools import setup, find_packages +from Cython.Build import cythonize +from pkg_resources import parse_requirements + +def write_version_py(): + version_txt_path = os.path.join("IndicTransToolkit", "version.txt") + with open(version_txt_path, "r", encoding="utf-8") as f: + version = f.read().strip() + + version_py_path = os.path.join("IndicTransToolkit", "version.py") + with open(version_py_path, "w", encoding="utf-8") as f: + f.write(f'__version__ = "{version}"\n') + return version + +# Enforce Python >= 3.8 +if version_info < (3, 8): + exit("Sorry, Python >= 3.8 is required for IndicTransToolkit.") + +# Read long description from README +with open("README.md", "r", errors="ignore", encoding="utf-8") as fh: + long_description = fh.read().strip() + +# Write version.py from version.txt +version = write_version_py() + +# Parse requirements.txt +req_file = pathlib.Path("requirements.txt") +requirements = [str(req) for req in parse_requirements(req_file.open())] + +# Cython files to compile (adjust if your .pyx name differs) +cython_extensions = cythonize( + [ + "IndicTransToolkit/processor.pyx", + ], + compiler_directives={"language_level": "3", "boundscheck": False}, +) + +setup( + name="IndicTransToolkit", + version=version, + author="Varun Gumma", + author_email="varun230999@gmail.com", + description="A simple, consistent, and extendable module for IndicTrans2 tokenizer compatible with HuggingFace models", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/VarunGumma/IndicTransToolkit", + packages=find_packages(), # Auto-detect packages + license="MIT", + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires=">=3.8", + install_requires=requirements, + ext_modules=cython_extensions, + zip_safe=False, +) diff --git a/libs/indic_nlp_library/.gitignore b/libs/indic_nlp_library/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..cd773eaed290f7ef5fb7d1ac8400eb2a53967ce7 --- /dev/null +++ b/libs/indic_nlp_library/.gitignore @@ -0,0 +1,6 @@ +**/__pycache__/ +*.egg-info/ +dist/ +build/ +contrib/ +docs/ \ No newline at end of file diff --git a/libs/indic_nlp_library/LICENSE b/libs/indic_nlp_library/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..cd7ae896c5346bd3339307df3c1d1d350ed67316 --- /dev/null +++ b/libs/indic_nlp_library/LICENSE @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) 2013-present Anoop Kunchukuttan + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/libs/indic_nlp_library/README.md b/libs/indic_nlp_library/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ebf1791dfee931dff006f831b02b969cff9c5167 --- /dev/null +++ b/libs/indic_nlp_library/README.md @@ -0,0 +1,22 @@ +# Indic NLP Library +This repository is a _de-bloated_ fork of the original [Indic NLP Library](https://github.com/anoopkunchukuttan/indic_nlp_library) and integrates [UrduHack](https://github.com/urduhack/urduhack) submodule and [Indic NLP Resources](https://github.com/anoopkunchukuttan/indic_nlp_resources) directly. This allows to work with Urdu normalization and tokenization without needing to install [urduhack](https://pypi.org/project/urduhack/) and `indic_nlp_resources` separately, which can be an issue sometimes as it is `TensorFlow` based. This repository is mainly created and mainted for [IndicTrans2](https://github.com/AI4Bharat/IndicTrans2) and [IndicTransTokenizer](https://github.com/VarunGumma/IndicTransTokenizer) + +For any queries, please get in touch with the original authors/maintainers of the respective libraries: + +- `Indic NLP Library`: [anoopkunchukuttan](https://github.com/anoopkunchukuttan) +- `Indic NLP Resources`: [anoopkunchukuttan](https://github.com/anoopkunchukuttan) +- `UrduHack`: [UrduHack](https://github.com/urduhack) + +## Usage: +``` +git clone https://github.com/VarunGumma/indic_nlp_library.git + +cd indic_nlp_library +pip install --editable ./ +``` + +## Updates: +- Integrated `urduhack` directly into the repository. +- Renamed `master` branch as `main`. +- Integrated `indic_nlp_resources` directly into the repository. +- _De-bloated_ the repository. diff --git a/libs/indic_nlp_library/RESOURCES/script/all_script_phonetic_data.csv b/libs/indic_nlp_library/RESOURCES/script/all_script_phonetic_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..7a6b4f44d0490ad15971601951d6806669283aba --- /dev/null +++ b/libs/indic_nlp_library/RESOURCES/script/all_script_phonetic_data.csv @@ -0,0 +1,113 @@ +Unicode,Relative Offset,Devanagari,ITRANS,Notes,Valid Vector Representation,is_vowel,is_consonant,nukta,halanta,anusvara,misc,short_vowel,long_vowel,weak,medium,strong,independent_vowel,dependent_vowel,plosive,fricative,Central-approximant,Lateral-approximant,flap,velar,palatal,retroflex,dental,labial,aspirated,not_aspirated,voiced,unvoiced,nasal,not_nasal,front,central,back,close,close-mid,open-mid,open,rounded,not_rounded +900,0,ऀ,ऀ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +901,1,ँ,.n,,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +902,2,ं,.n,,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +903,3,ः,H,Should represent as pure aspiration and not as a vowel,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +904,4,ऄ,ऄ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +905,5,अ,a,,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1 +906,6,आ,A,,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1 +907,7,इ,i,,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1 +908,8,ई,I,,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1 +909,9,उ,u,,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0 +90a,10,ऊ,uu,,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0 +90b,11,ऋ,R^i,,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1 +90c,12,ऌ,LLi,,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0 +90d,13,ऍ,ऍ,Nasalized e,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1 +90e,14,ऎ,.e,,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1 +90f,15,ए,e,,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1 +910,16,ऐ,ai,,1,1,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,1 +911,17,ऑ,ऑ,Nasalized o,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0 +912,18,ऒ,.o,,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0 +913,19,ओ,o,,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0 +914,20,औ,au,,1,1,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1 +915,21,क,ka,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0 +916,22,ख,kha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0 +917,23,ग,ga,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +918,24,घ,gha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0 +919,25,ङ,~Na,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +91a,26,च,ca,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0 +91b,27,छ,Cha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0 +91c,28,ज,ja,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +91d,29,झ,jha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0 +91e,30,ञ,JNa,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +91f,31,ट,Ta,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0 +920,32,ठ,Tha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0 +921,33,ड,Da,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +922,34,ढ,Dha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0 +923,35,ण,Na,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +924,36,त,ta,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0 +925,37,थ,tha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0 +926,38,द,da,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +927,39,ध,dha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0 +928,40,न,na,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +929,41,ऩ,ऩ,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +92a,42,प,pa,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0 +92b,43,फ,pha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0 +92c,44,ब,ba,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +92d,45,भ,bha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0 +92e,46,म,ma,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +92f,47,य,ya,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +930,48,र,ra,alveolar or dental- approximated by dental/ can also be considered a rhotic consonant (flap ie tap),1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +931,49,ऱ,Ra,retroflex (trill),1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +932,50,ल,la,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +933,51,ळ,La,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +934,52,ऴ,zha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +935,53,व,va,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +936,54,श,sha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0 +937,55,ष,Sha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0 +938,56,स,sa,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0 +939,57,ह,ha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0 +93a,58,ऺ,ऺ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +93b,59,ऻ,ऻ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +93c,60,़,़,,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +93d,61,ऽ,.a,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +93e,62,ा,A,,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1 +93f,63,ि,i,,1,1,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1 +940,64,ी,I,,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1 +941,65,ु,u,,1,1,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0 +942,66,ू,uu,,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0 +943,67,ृ,R^i,,1,1,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1 +944,68,ॄ,R^I,,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1 +945,69,ॅ,ॅ,Nasalized e,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1 +946,70,ॆ,.e,,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1 +947,71,े,e,,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1 +948,72,ै,ai,,1,1,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,1 +949,73,ॉ,ॉ,Nasalized o,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0 +94a,74,ॊ,.o,,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0 +94b,75,ो,o,,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0 +94c,76,ौ,au,,1,1,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1 +94d,77,्,,,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +94e,78,ॎ,ॎ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +94f,79,ॏ,ॏ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +950,80,ॐ,AUM,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +951,81,॑,॑,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +952,82,॒,॒,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +953,83,॓,॓,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +954,84,॔,॔,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +955,85,ॕ,ॕ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +956,86,ॖ,ॖ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +957,87,ॗ,ॗ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +958,88,क़,क़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +959,89,ख़,ख़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +95a,90,ग़,ग़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +95b,91,ज़,ज़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +95c,92,ड़,ड़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +95d,93,ढ़,ढ़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +95e,94,फ़,फ़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +95f,95,य़,य़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +960,96,ॠ,R^I,,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1 +961,97,ॡ,L^I,,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0 +962,98,ॢ,LLi,,1,1,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0 +963,99,ॣ,L^I,,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0,1,0 +964,100,।,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +965,101,॥,..,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +966,102,०,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +967,103,१,1,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +968,104,२,2,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +969,105,३,3,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +96a,106,४,4,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +96b,107,५,5,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +96c,108,६,6,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +96d,109,७,7,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +96e,110,८,8,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +96f,111,९,9,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/libs/indic_nlp_library/RESOURCES/script/english_arpabet_list.csv b/libs/indic_nlp_library/RESOURCES/script/english_arpabet_list.csv new file mode 100644 index 0000000000000000000000000000000000000000..6dd0eda41b8f2f4f756087de047b1743f0da7823 --- /dev/null +++ b/libs/indic_nlp_library/RESOURCES/script/english_arpabet_list.csv @@ -0,0 +1,46 @@ +AO +AA +IY +UW +EH +IH +UH +AH +AX +AE +EY +AY +OW +AW +OY +P +B +T +D +K +G +CH +JH +F +V +TH +DH +S +Z +SH +ZH +HH +M +EM +N +EN +NG +ENG +L +EL +R +DX +NX +Y +W +Q diff --git a/libs/indic_nlp_library/RESOURCES/script/english_script_phonetic_data.csv b/libs/indic_nlp_library/RESOURCES/script/english_script_phonetic_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..d636d87c0b2f732c7cff46925baacb7d4e72ef83 --- /dev/null +++ b/libs/indic_nlp_library/RESOURCES/script/english_script_phonetic_data.csv @@ -0,0 +1,47 @@ +Unicode,Relative Offset,Devanagari,ITRANS,Notes,Valid Vector Representation,is_vowel,is_consonant,nukta,halanta,anusvara,misc,short_vowel,long_vowel,weak,medium,strong,independent_vowel,dependent_vowel,plosive,fricative,Central-approximant,Lateral-approximant,flap,velar,palatal,retroflex,dental,labial,aspirated,not_aspirated,voiced,unvoiced,nasal,not_nasal,front,central,back,close,close-mid,open-mid,open,rounded,not_rounded +900,0,,AO,,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0 +901,1,,AA,,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1 +902,2,,IY,,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1 +903,3,,UW,,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0 +904,4,ए,EH,,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1 +905,5,इ,IH,,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1 +906,6,उ,UH,,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0 +907,7,अ,AH,,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1 +908,8,अ,AX,,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1 +909,9,ऍ,AE,,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1 +90a,10,,EY,,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1 +90b,11,ऐ,AY,,1,1,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,1 +90c,12,ओ,OW,,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0 +90d,13,औ,AW,,1,1,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1 +90e,14,,OY,,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0 +90f,15,,P,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0 +910,16,,B,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +911,17,,T,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0 +912,18,,D,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +913,19,,K,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0 +914,20,,G,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +915,21,,CH,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0 +916,22,,JH,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +917,23,,F,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0 +918,24,,V,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +919,25,,TH,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0 +91a,26,,DH,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +91b,27,,S,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0 +91c,28,,Z,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +91d,29,,SH,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0 +91e,30,,ZH,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0 +91f,31,,HH,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0 +920,32,,M,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +921,33,,EM,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +922,34,,N,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +923,35,,EN,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +924,36,,NG,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +925,37,,ENG,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +926,38,,L,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +927,39,,EL,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +928,40,,R,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +929,41,,DX,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +92a,42,,NX,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +92b,43,,Y,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +92c,44,,W,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +92d,45,,Q,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/libs/indic_nlp_library/RESOURCES/script/tamil_script_phonetic_data.csv b/libs/indic_nlp_library/RESOURCES/script/tamil_script_phonetic_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..0f31cc848e384858682283b0b82a3cf2f21f4d71 --- /dev/null +++ b/libs/indic_nlp_library/RESOURCES/script/tamil_script_phonetic_data.csv @@ -0,0 +1,113 @@ +Unicode,Relative Offset,Devanagari,ITRANS,Notes,Valid Vector Representation,is_vowel,is_consonant,nukta,halanta,anusvara,misc,short_vowel,long_vowel,weak,medium,strong,independent_vowel,dependent_vowel,plosive,fricative,Central-approximant,Lateral-approximant,flap,velar,palatal,retroflex,dental,labial,aspirated,not_aspirated,voiced,unvoiced,nasal,not_nasal,front,central,back,close,close-mid,open-mid,open,rounded,not_rounded +900,0,ऀ,ऀ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +901,1,ँ,.n,,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +902,2,ं,.n,,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +903,3,ः,H,Should represent as pure aspiration and not as a vowel,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +904,4,ऄ,ऄ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +905,5,अ,a,,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1 +906,6,आ,A,,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1 +907,7,इ,i,,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1 +908,8,ई,I,,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1 +909,9,उ,u,,1,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0 +90a,10,ऊ,uu,,1,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0 +90b,11,ऋ,R^i,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +90c,12,ऌ,LLi,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +90d,13,ऍ,ऍ,Nasalized e,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1 +90e,14,ऎ,.e,,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1 +90f,15,ए,e,,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1 +910,16,ऐ,ai,,1,1,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,1 +911,17,ऑ,ऑ,Nasalized o,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0 +912,18,ऒ,.o,,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0 +913,19,ओ,o,,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0 +914,20,औ,au,,1,1,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1 +915,21,क,ka,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0 +916,22,ख,kha,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +917,23,ग,ga,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +918,24,घ,gha,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +919,25,ङ,~Na,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +91a,26,च,ca,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0 +91b,27,छ,Cha,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +91c,28,ज,ja,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +91d,29,झ,jha,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +91e,30,ञ,JNa,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +91f,31,ट,Ta,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0 +920,32,ठ,Tha,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +921,33,ड,Da,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +922,34,ढ,Dha,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +923,35,ण,Na,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +924,36,त,ta,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0 +925,37,थ,tha,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +926,38,द,da,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +927,39,ध,dha,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +928,40,न,na,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +929,41,ऩ,ऩ,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +92a,42,प,pa,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0 +92b,43,फ,pha,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +92c,44,ब,ba,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +92d,45,भ,bha,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +92e,46,म,ma,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0 +92f,47,य,ya,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +930,48,र,ra,alveolar or dental- approximated by dental/ can also be considered a rhotic consonant (flap ie tap),1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +931,49,ऱ,Ra,retroflex (trill),1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +932,50,ल,la,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +933,51,ळ,La,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +934,52,ऴ,zha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +935,53,व,va,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 +936,54,श,sha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0 +937,55,ष,Sha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0 +938,56,स,sa,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0 +939,57,ह,ha,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0 +93a,58,ऺ,ऺ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +93b,59,ऻ,ऻ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +93c,60,़,़,,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +93d,61,ऽ,.a,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +93e,62,ा,A,,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1 +93f,63,ि,i,,1,1,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1 +940,64,ी,I,,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1 +941,65,ु,u,,1,1,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0 +942,66,ू,uu,,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0 +943,67,ृ,R^i,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +944,68,ॄ,R^I,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +945,69,ॅ,ॅ,Nasalized e,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,0,1,0,0,0,1 +946,70,ॆ,.e,,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1 +947,71,े,e,,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1 +948,72,ै,ai,,1,1,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,1 +949,73,ॉ,ॉ,Nasalized o,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0 +94a,74,ॊ,.o,,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0 +94b,75,ो,o,,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,0,1,0 +94c,76,ौ,au,,1,1,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1 +94d,77,्,,,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +94e,78,ॎ,ॎ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +94f,79,ॏ,ॏ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +950,80,ॐ,AUM,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +951,81,॑,॑,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +952,82,॒,॒,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +953,83,॓,॓,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +954,84,॔,॔,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +955,85,ॕ,ॕ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +956,86,ॖ,ॖ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +957,87,ॗ,ॗ,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +958,88,क़,क़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +959,89,ख़,ख़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +95a,90,ग़,ग़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +95b,91,ज़,ज़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +95c,92,ड़,ड़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +95d,93,ढ़,ढ़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +95e,94,फ़,फ़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +95f,95,य़,य़,will be decomposed to canonical representation: consonant+nukta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +960,96,ॠ,R^I,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1 +961,97,ॡ,L^I,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0 +962,98,ॢ,LLi,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +963,99,ॣ,L^I,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +964,100,।,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +965,101,॥,..,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +966,102,०,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +967,103,१,1,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +968,104,२,2,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +969,105,३,3,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +96a,106,४,4,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +96b,107,५,5,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +96c,108,६,6,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +96d,109,७,7,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +96e,110,८,8,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +96f,111,९,9,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/libs/indic_nlp_library/RESOURCES/transliterate/offset_itrans_map.csv b/libs/indic_nlp_library/RESOURCES/transliterate/offset_itrans_map.csv new file mode 100644 index 0000000000000000000000000000000000000000..f916d9fb30bd3a40096c2304e11f52b62b4c8825 --- /dev/null +++ b/libs/indic_nlp_library/RESOURCES/transliterate/offset_itrans_map.csv @@ -0,0 +1,129 @@ +offset_hex,devnag_char,itrans +0x0,ऀ,ऀ +0x1,ँ,ँ +0x2,ं,.m +0x3,ः,H +0x4,ऄ,ऄ +0x5,अ,a +0x6,आ,aa +0x7,इ,i +0x8,ई,ii +0x9,उ,u +0xa,ऊ,uu +0xb,ऋ,R^i +0xc,ऌ,L^i +0xd,ऍ,ऍ +0xe,ऎ,.e +0xf,ए,e +0x10,ऐ,ai +0x11,ऑ,ऑ +0x12,ऒ,.o +0x13,ओ,o +0x14,औ,au +0x15,क,ka +0x16,ख,kha +0x17,ग,ga +0x18,घ,gha +0x19,ङ,~Na +0x1a,च,cha +0x1b,छ,Cha +0x1c,ज,ja +0x1d,झ,jha +0x1e,ञ,~na +0x1f,ट,Ta +0x20,ठ,Tha +0x21,ड,Da +0x22,ढ,Dha +0x23,ण,Na +0x24,त,ta +0x25,थ,tha +0x26,द,da +0x27,ध,dha +0x28,न,na +0x29,ऩ,*na +0x2a,प,pa +0x2b,फ,pha +0x2c,ब,ba +0x2d,भ,bha +0x2e,म,ma +0x2f,य,ya +0x30,र,ra +0x31,ऱ,Ra +0x32,ल,la +0x33,ळ,lda +0x34,ऴ,zha +0x35,व,va +0x36,श,sha +0x37,ष,Sha +0x38,स,sa +0x39,ह,ha +0x3a,ऺ,ऺ +0x3b,ऻ,ऻ +0x3c,़,़ +0x3d,ऽ,.a +0x3e,ा,aa +0x3f,ि,i +0x40,ी,ii +0x41,ु,u +0x42,ू,uu +0x43,ृ,R^i +0x44,ॄ,R^I +0x45,ॅ,ॅ +0x46,ॆ,.e +0x47,े,e +0x48,ै,ai +0x49,ॉ,ॉ +0x4a,ॊ,.o +0x4b,ो,o +0x4c,ौ,au +0x4d,्, +0x4e,ॎ,ॎ +0x4f,ॏ,ॏ +0x50,ॐ,AUM +0x51,॑,॑ +0x52,॒,॒ +0x53,॓,॓ +0x54,॔,॔ +0x55,ॕ,ॕ +0x56,ॖ,ॖ +0x57,ॗ,ॗ +0x58,क़,क़ +0x59,ख़,ख़ +0x5a,ग़,ग़ +0x5b,ज़,ज़ +0x5c,ड़,ड़ +0x5d,ढ़,ढ़ +0x5e,फ़,फ़ +0x5f,य़,य़ +0x60,ॠ,R^I +0x61,ॡ,L^I +0x62,ॢ,L^i +0x63,ॣ,L^I +0x64,।,. +0x65,॥,.. +0x66,०,0 +0x67,१,1 +0x68,२,2 +0x69,३,3 +0x6a,४,4 +0x6b,५,5 +0x6c,६,6 +0x6d,७,7 +0x6e,८,8 +0x6f,९,9 +0x70,॰,॰ +0x71,ॱ,ॱ +0x72,ॲ,ॲ +0x73,ॳ,ॳ +0x74,ॴ,ॴ +0x75,ॵ,ॵ +0x76,ॶ,ॶ +0x77,ॷ,ॷ +0x78,ॸ,ॸ +0x79,ॹ,ॹ +0x7a,ॺ,ॺ +0x7b,ॻ,ॻ +0x7c,ॼ,ॼ +0x7d,ॽ,ॽ +0x7e,ॾ,ॾ +0x7f,ॿ,a diff --git a/libs/indic_nlp_library/indic_nlp_library_IT2.egg-info/PKG-INFO b/libs/indic_nlp_library/indic_nlp_library_IT2.egg-info/PKG-INFO new file mode 100644 index 0000000000000000000000000000000000000000..0220cea5cbd1a9a81fd2eaa984b2e082859fac99 --- /dev/null +++ b/libs/indic_nlp_library/indic_nlp_library_IT2.egg-info/PKG-INFO @@ -0,0 +1,52 @@ +Metadata-Version: 2.2 +Name: indic_nlp_library_IT2 +Version: 0.0.2 +Summary: The goal of the Indic NLP Library is to build Python based libraries for common text processing and Natural Language Processing in Indian languages. This fork is specialized for IndicTrans2. +Home-page: https://github.com/VarunGumma/indic_nlp_library +Author: Varun Gumma +Author-email: varun230999@gmail.com +License: MIT +Classifier: Programming Language :: Python :: 3 +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Requires-Python: >=3.8 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: sphinx-argparse +Requires-Dist: sphinx_rtd_theme +Requires-Dist: morfessor +Requires-Dist: pandas +Requires-Dist: numpy +Dynamic: author +Dynamic: author-email +Dynamic: classifier +Dynamic: description +Dynamic: description-content-type +Dynamic: home-page +Dynamic: license +Dynamic: requires-dist +Dynamic: requires-python +Dynamic: summary + +# Indic NLP Library +This repository is a _de-bloated_ fork of the original [Indic NLP Library](https://github.com/anoopkunchukuttan/indic_nlp_library) and integrates [UrduHack](https://github.com/urduhack/urduhack) submodule and [Indic NLP Resources](https://github.com/anoopkunchukuttan/indic_nlp_resources) directly. This allows to work with Urdu normalization and tokenization without needing to install [urduhack](https://pypi.org/project/urduhack/) and `indic_nlp_resources` separately, which can be an issue sometimes as it is `TensorFlow` based. This repository is mainly created and mainted for [IndicTrans2](https://github.com/AI4Bharat/IndicTrans2) and [IndicTransTokenizer](https://github.com/VarunGumma/IndicTransTokenizer) + +For any queries, please get in touch with the original authors/maintainers of the respective libraries: + +- `Indic NLP Library`: [anoopkunchukuttan](https://github.com/anoopkunchukuttan) +- `Indic NLP Resources`: [anoopkunchukuttan](https://github.com/anoopkunchukuttan) +- `UrduHack`: [UrduHack](https://github.com/urduhack) + +## Usage: +``` +git clone https://github.com/VarunGumma/indic_nlp_library.git + +cd indic_nlp_library +pip install --editable ./ +``` + +## Updates: +- Integrated `urduhack` directly into the repository. +- Renamed `master` branch as `main`. +- Integrated `indic_nlp_resources` directly into the repository. +- _De-bloated_ the repository. diff --git a/libs/indic_nlp_library/indic_nlp_library_IT2.egg-info/SOURCES.txt b/libs/indic_nlp_library/indic_nlp_library_IT2.egg-info/SOURCES.txt new file mode 100644 index 0000000000000000000000000000000000000000..fec3085ef359eb736a4bbeafc3ff71a90e13fa03 --- /dev/null +++ b/libs/indic_nlp_library/indic_nlp_library_IT2.egg-info/SOURCES.txt @@ -0,0 +1,40 @@ +LICENSE +README.md +setup.py +indic_nlp_library_IT2.egg-info/PKG-INFO +indic_nlp_library_IT2.egg-info/SOURCES.txt +indic_nlp_library_IT2.egg-info/dependency_links.txt +indic_nlp_library_IT2.egg-info/requires.txt +indic_nlp_library_IT2.egg-info/top_level.txt +indicnlp/__init__.py +indicnlp/common.py +indicnlp/langinfo.py +indicnlp/loader.py +indicnlp/version.py +indicnlp/normalize/__init__.py +indicnlp/normalize/indic_normalize.py +indicnlp/script/__init__.py +indicnlp/script/english_script.py +indicnlp/script/indic_scripts.py +indicnlp/script/phonetic_sim.py +indicnlp/tokenize/__init__.py +indicnlp/tokenize/indic_detokenize.py +indicnlp/tokenize/indic_tokenize.py +indicnlp/tokenize/sentence_tokenize.py +indicnlp/transliterate/__init__.py +indicnlp/transliterate/acronym_transliterator.py +indicnlp/transliterate/script_unifier.py +indicnlp/transliterate/unicode_transliterate.py +indicnlp/urduhack/__init__.py +indicnlp/urduhack/stop_words.py +indicnlp/urduhack/urdu_characters.py +indicnlp/urduhack/normalization/__init__.py +indicnlp/urduhack/normalization/character.py +indicnlp/urduhack/normalization/regexes.py +indicnlp/urduhack/preprocessing/__init__.py +indicnlp/urduhack/preprocessing/character.py +indicnlp/urduhack/preprocessing/regexes.py +indicnlp/urduhack/preprocessing/util.py +indicnlp/urduhack/tokenization/__init__.py +indicnlp/urduhack/tokenization/eos.py +indicnlp/urduhack/tokenization/tokenizer.py \ No newline at end of file diff --git a/libs/indic_nlp_library/indic_nlp_library_IT2.egg-info/dependency_links.txt b/libs/indic_nlp_library/indic_nlp_library_IT2.egg-info/dependency_links.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/libs/indic_nlp_library/indic_nlp_library_IT2.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/libs/indic_nlp_library/indic_nlp_library_IT2.egg-info/requires.txt b/libs/indic_nlp_library/indic_nlp_library_IT2.egg-info/requires.txt new file mode 100644 index 0000000000000000000000000000000000000000..d92c692126bd50f3ed49db616c7f108513ac30b2 --- /dev/null +++ b/libs/indic_nlp_library/indic_nlp_library_IT2.egg-info/requires.txt @@ -0,0 +1,5 @@ +sphinx-argparse +sphinx_rtd_theme +morfessor +pandas +numpy diff --git a/libs/indic_nlp_library/indic_nlp_library_IT2.egg-info/top_level.txt b/libs/indic_nlp_library/indic_nlp_library_IT2.egg-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..51f364b577834ba0058580fb33703fe7e50b3b76 --- /dev/null +++ b/libs/indic_nlp_library/indic_nlp_library_IT2.egg-info/top_level.txt @@ -0,0 +1 @@ +indicnlp diff --git a/libs/indic_nlp_library/indicnlp/__init__.py b/libs/indic_nlp_library/indicnlp/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6b9434dafe8f5d68900c5086c9ea56ac98cb1e71 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/__init__.py @@ -0,0 +1,8 @@ +import os + +try: + from .version import __version__ # noqa +except ImportError: + version_txt = os.path.join(os.path.dirname(__file__), "version.txt") + with open(version_txt) as f: + __version__ = f.read().strip() diff --git a/libs/indic_nlp_library/indicnlp/common.py b/libs/indic_nlp_library/indicnlp/common.py new file mode 100644 index 0000000000000000000000000000000000000000..9add7994158c900c6144fa1d3fb1177bd92a601f --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/common.py @@ -0,0 +1,61 @@ +# +# Copyright (c) 2013-present, Anoop Kunchukuttan +# All rights reserved. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# + +import os +from pathlib import Path + +""" +Path to the Indic NLP Resources directory +""" +INDIC_RESOURCES_PATH = os.path.join(Path(__file__).resolve().parent.parent, "RESOURCES") + + +def init(): + """ + Initialize the module. The following actions are performed: + + - Checks of INDIC_RESOURCES_PATH variable is set. If not, checks if it can beb initialized from + INDIC_RESOURCES_PATH environment variable. If that fails, an exception is raised + """ + global INDIC_RESOURCES_PATH + try: + if INDIC_RESOURCES_PATH == "": + INDIC_RESOURCES_PATH = os.environ["INDIC_RESOURCES_PATH"] + except Exception as e: + raise IndicNlpException("INDIC_RESOURCES_PATH not set") + + if INDIC_RESOURCES_PATH == "": + raise IndicNlpException("INDIC_RESOURCES_PATH not set") + + +def get_resources_path(): + """ + Get the path to the Indic NLP Resources directory + """ + return INDIC_RESOURCES_PATH + + +def set_resources_path(resources_path): + """ + Set the path to the Indic NLP Resources directory + """ + global INDIC_RESOURCES_PATH + INDIC_RESOURCES_PATH = resources_path + + +class IndicNlpException(Exception): + """ + Exceptions thrown by Indic NLP Library components are instances of this class. + 'msg' attribute contains exception details. + """ + + def __init__(self, msg): + self.msg = msg + + def __str__(self): + return repr(self.msg) diff --git a/libs/indic_nlp_library/indicnlp/langinfo.py b/libs/indic_nlp_library/indicnlp/langinfo.py new file mode 100644 index 0000000000000000000000000000000000000000..c5f2b2e62cf3c627531e406adf13770c620acdcb --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/langinfo.py @@ -0,0 +1,429 @@ +# +# Copyright (c) 2013-present, Anoop Kunchukuttan +# All rights reserved. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# + +## language codes +LC_TA = "ta" + +SCRIPT_RANGES = { + "pa": [0x0A00, 0x0A7F], + "gu": [0x0A80, 0x0AFF], + "or": [0x0B00, 0x0B7F], + "ta": [0x0B80, 0x0BFF], + "te": [0x0C00, 0x0C7F], + "kn": [0x0C80, 0x0CFF], + "ml": [0x0D00, 0x0D7F], + "si": [0x0D80, 0x0DFF], + "hi": [0x0900, 0x097F], + "mr": [0x0900, 0x097F], + "kK": [0x0900, 0x097F], + "sa": [0x0900, 0x097F], + "ne": [0x0900, 0x097F], + "sd": [0x0900, 0x097F], + "bn": [0x0980, 0x09FF], + "as": [0x0980, 0x09FF], +} + +DRAVIDIAN_LANGUAGES = [ + "ta", + "te", + "kn", + "ml", +] +IE_LANGUAGES = [ + "hi", + "mr", + "kK", + "sa", + "ne", + "sd", + "bn", + "as", + "pa", + "gu", + "or", + "si", +] +DANDA_DELIM_LANGUAGES = ["as", "bn", "hi", "ne", "or", "pa", "sa", "sd"] + +URDU_RANGES = [ + [0x0600, 0x06FF], + [0x0750, 0x077F], + [0xFB50, 0xFDFF], + [0xFE70, 0xFEFF], +] + +COORDINATED_RANGE_START_INCLUSIVE = 0 +COORDINATED_RANGE_END_INCLUSIVE = 0x6F + +NUMERIC_OFFSET_START = 0x66 +NUMERIC_OFFSET_END = 0x6F + +HALANTA_OFFSET = 0x4D +AUM_OFFSET = 0x50 +NUKTA_OFFSET = 0x3C + +RUPEE_SIGN = 0x20B9 + +DANDA = 0x0964 +DOUBLE_DANDA = 0x0965 + +# TODO: add missing fricatives and approximants +VELAR_RANGE = [0x15, 0x19] +PALATAL_RANGE = [0x1A, 0x1E] +RETROFLEX_RANGE = [0x1F, 0x23] +DENTAL_RANGE = [0x24, 0x29] +LABIAL_RANGE = [0x2A, 0x2E] + +# verify +VOICED_LIST = [0x17, 0x18, 0x1C, 0x1D, 0x21, 0x22, 0x26, 0x27, 0x2C, 0x2D] +UNVOICED_LIST = [ + 0x15, + 0x16, + 0x1A, + 0x1B, + 0x1F, + 0x20, + 0x24, + 0x25, + 0x2A, + 0x2B, +] # TODO: add sibilants/sonorants +ASPIRATED_LIST = [0x16, 0x18, 0x1B, 0x1D, 0x20, 0x22, 0x25, 0x27, 0x2B, 0x2D] +UNASPIRATED_LIST = [0x15, 0x17, 0x1A, 0x1C, 0x1F, 0x21, 0x24, 0x26, 0x2A, 0x2C] +NASAL_LIST = [0x19, 0x1E, 0x23, 0x28, 0x29, 0x2D] +FRICATIVE_LIST = [0x36, 0x37, 0x38] +APPROXIMANT_LIST = [0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35] + +# TODO: ha has to be properly categorized + + +def is_danda_delim(lang): + """ + Returns True if danda/double danda is a possible delimiter for the language + """ + return lang in DANDA_DELIM_LANGUAGES + + +def get_offset(c, lang): + """ + Applicable to Brahmi derived Indic scripts + """ + return ord(c) - SCRIPT_RANGES[lang][0] + + +def offset_to_char(c, lang): + """ + Applicable to Brahmi derived Indic scripts + """ + return chr(c + SCRIPT_RANGES[lang][0]) + + +def in_coordinated_range(c_offset): + """ + Applicable to Brahmi derived Indic scripts + """ + return ( + c_offset >= COORDINATED_RANGE_START_INCLUSIVE + and c_offset <= COORDINATED_RANGE_END_INCLUSIVE + ) + + +def is_indiclang_char(c, lang): + """ + Applicable to Brahmi derived Indic scripts + """ + o = get_offset(c, lang) + return (o >= 0 and o <= 0x7F) or ord(c) == DANDA or ord(c) == DOUBLE_DANDA + + +def is_vowel(c, lang): + """ + Is the character a vowel + """ + o = get_offset(c, lang) + return o >= 0x04 and o <= 0x14 + + +def is_vowel_sign(c, lang): + """ + Is the character a vowel sign (maatraa) + """ + o = get_offset(c, lang) + return o >= 0x3E and o <= 0x4C + + +def is_halanta(c, lang): + """ + Is the character the halanta character + """ + o = get_offset(c, lang) + return o == HALANTA_OFFSET + + +def is_nukta(c, lang): + """ + Is the character the halanta character + """ + o = get_offset(c, lang) + return o == NUKTA_OFFSET + + +def is_aum(c, lang): + """ + Is the character a vowel sign (maatraa) + """ + o = get_offset(c, lang) + return o == AUM_OFFSET + + +def is_consonant(c, lang): + """ + Is the character a consonant + """ + o = get_offset(c, lang) + return o >= 0x15 and o <= 0x39 + + +def is_velar(c, lang): + """ + Is the character a velar + """ + o = get_offset(c, lang) + return o >= VELAR_RANGE[0] and o <= VELAR_RANGE[1] + + +def is_palatal(c, lang): + """ + Is the character a palatal + """ + o = get_offset(c, lang) + return o >= PALATAL_RANGE[0] and o <= PALATAL_RANGE[1] + + +def is_retroflex(c, lang): + """ + Is the character a retroflex + """ + o = get_offset(c, lang) + return o >= RETROFLEX_RANGE[0] and o <= RETROFLEX_RANGE[1] + + +def is_dental(c, lang): + """ + Is the character a dental + """ + o = get_offset(c, lang) + return o >= DENTAL_RANGE[0] and o <= DENTAL_RANGE[1] + + +def is_labial(c, lang): + """ + Is the character a labial + """ + o = get_offset(c, lang) + return o >= LABIAL_RANGE[0] and o <= LABIAL_RANGE[1] + + +def is_voiced(c, lang): + """ + Is the character a voiced consonant + """ + o = get_offset(c, lang) + return o in VOICED_LIST + + +def is_unvoiced(c, lang): + """ + Is the character a unvoiced consonant + """ + o = get_offset(c, lang) + return o in UNVOICED_LIST + + +def is_aspirated(c, lang): + """ + Is the character a aspirated consonant + """ + o = get_offset(c, lang) + return o in ASPIRATED_LIST + + +def is_unaspirated(c, lang): + """ + Is the character a unaspirated consonant + """ + o = get_offset(c, lang) + return o in UNASPIRATED_LIST + + +def is_nasal(c, lang): + """ + Is the character a nasal consonant + """ + o = get_offset(c, lang) + return o in NASAL_LIST + + +def is_fricative(c, lang): + """ + Is the character a fricative consonant + """ + o = get_offset(c, lang) + return o in FRICATIVE_LIST + + +def is_approximant(c, lang): + """ + Is the character an approximant consonant + """ + o = get_offset(c, lang) + return o in APPROXIMANT_LIST + + +def is_number(c, lang): + """ + Is the character a number + """ + o = get_offset(c, lang) + return o >= 0x66 and o <= 0x6F + + +################################################## + + +def is_vowel_offset(c_offset): + """ + Is the offset a vowel + """ + return c_offset >= 0x04 and c_offset <= 0x14 + + +def is_vowel_sign_offset(c_offset): + """ + Is the offset a vowel sign (maatraa) + """ + return c_offset >= 0x3E and c_offset <= 0x4C + + +def is_halanta_offset(c_offset): + """ + Is the offset the halanta offset + """ + return c_offset == HALANTA_OFFSET + + +def is_nukta_offset(c_offset): + """ + Is the offset the halanta offset + """ + return c_offset == NUKTA_OFFSET + + +def is_aum_offset(c_offset): + """ + Is the offset a vowel sign (maatraa) + """ + return c_offset == AUM_OFFSET + + +def is_consonant_offset(c_offset): + """ + Is the offset a consonant + """ + return c_offset >= 0x15 and c_offset <= 0x39 + + +def is_velar_offset(c_offset): + """ + Is the offset a velar + """ + return c_offset >= VELAR_RANGE[0] and c_offset <= VELAR_RANGE[1] + + +def is_palatal_offset(c_offset): + """ + Is the offset a palatal + """ + return c_offset >= PALATAL_RANGE[0] and c_offset <= PALATAL_RANGE[1] + + +def is_retroflex_offset(c_offset): + """ + Is the offset a retroflex + """ + return c_offset >= RETROFLEX_RANGE[0] and c_offset <= RETROFLEX_RANGE[1] + + +def is_dental_offset(c_offset): + """ + Is the offset a dental + """ + return c_offset >= DENTAL_RANGE[0] and c_offset <= DENTAL_RANGE[1] + + +def is_labial_offset(c_offset): + """ + Is the offset a labial + """ + return c_offset >= LABIAL_RANGE[0] and c_offset <= LABIAL_RANGE[1] + + +def is_voiced_offset(c_offset): + """ + Is the offset a voiced consonant + """ + return c_offset in VOICED_LIST + + +def is_unvoiced_offset(c_offset): + """ + Is the offset a unvoiced consonant + """ + return c_offset in UNVOICED_LIST + + +def is_aspirated_offset(c_offset): + """ + Is the offset a aspirated consonant + """ + return c_offset in ASPIRATED_LIST + + +def is_unaspirated_offset(c_offset): + """ + Is the offset a unaspirated consonant + """ + return c_offset in UNASPIRATED_LIST + + +def is_nasal_offset(c_offset): + """ + Is the offset a nasal consonant + """ + return c_offset in NASAL_LIST + + +def is_fricative_offset(c_offset): + """ + Is the offset a fricative consonant + """ + return c_offset in FRICATIVE_LIST + + +def is_approximant_offset(c_offset): + """ + Is the offset an approximant consonant + """ + return c_offset in APPROXIMANT_LIST + + +def is_number_offset(c_offset): + """ + Is the offset a number + """ + return c_offset >= 0x66 and c_offset <= 0x6F diff --git a/libs/indic_nlp_library/indicnlp/loader.py b/libs/indic_nlp_library/indicnlp/loader.py new file mode 100644 index 0000000000000000000000000000000000000000..fa4389b50e17b8c4f40c6e9b859852c71ff10469 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/loader.py @@ -0,0 +1,34 @@ +# +# Copyright (c) 2013-present, Anoop Kunchukuttan +# All rights reserved. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# + +from indicnlp import common +from indicnlp.script import indic_scripts +from indicnlp.script import english_script +from indicnlp.transliterate import unicode_transliterate + + +def load(): + """ + Initializes the Indic NLP library. Clients should call this method before using the library. + + Any module requiring initialization should have a init() method, to which a call must be made from this method + """ + + ### Order of intialization may matter + + # Common has to be loaded first to get access to resources + common.init() + + ## Initialization of Indic scripts module + indic_scripts.init() + + ## Initialization of English scripts module + english_script.init() + + ## Initialization of unicode_transliterate module + unicode_transliterate.init() diff --git a/libs/indic_nlp_library/indicnlp/normalize/__init__.py b/libs/indic_nlp_library/indicnlp/normalize/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/libs/indic_nlp_library/indicnlp/normalize/indic_normalize.py b/libs/indic_nlp_library/indicnlp/normalize/indic_normalize.py new file mode 100644 index 0000000000000000000000000000000000000000..01f7a67fc0c6e41ef8d565f67a969e535b04f4d7 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/normalize/indic_normalize.py @@ -0,0 +1,1088 @@ +# -*- coding: utf-8 -*- + +# +# Copyright (c) 2013-present, Anoop Kunchukuttan +# All rights reserved. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# + +# Program for normalization of text written in Unicode. This is mainly geared towards Indic scripts +# +# @author Anoop Kunchukuttan +# + +import sys +import codecs +import regex as re +from indicnlp import langinfo + + +class NormalizerI(object): + """ + The normalizer classes do the following: + * Some characters have multiple Unicode codepoints. The normalizer chooses a single standard representation + * Some control characters are deleted + * While typing using the Latin keyboard, certain typical mistakes occur which are corrected by the module + Base class for normalizer. Performs some common normalization, which includes: + * Byte order mark, word joiner, etc. removal + * ZERO_WIDTH_NON_JOINER and ZERO_WIDTH_JOINER removal + * ZERO_WIDTH_SPACE and NO_BREAK_SPACE replaced by spaces + Script specific normalizers should derive from this class and override the normalize() method. + They can call the super class 'normalize() method to avail of the common normalization + """ + + BYTE_ORDER_MARK = "\uFEFF" + BYTE_ORDER_MARK_2 = "\uFFFE" + WORD_JOINER = "\u2060" + SOFT_HYPHEN = "\u00AD" + + ZERO_WIDTH_SPACE = "\u200B" + NO_BREAK_SPACE = "\u00A0" + + ZERO_WIDTH_NON_JOINER = "\u200C" + ZERO_WIDTH_JOINER = "\u200D" + + def _normalize_punctuations(self, text): + """ + Normalize punctuations. + Applied many of the punctuation normalizations that are part of MosesNormalizer + from sacremoses + """ + text = text.replace(NormalizerI.BYTE_ORDER_MARK, "") + text = text.replace("„", r'"') + text = text.replace("“", r'"') + text = text.replace("”", r'"') + text = text.replace("–", r"-") + text = text.replace("—", r" - ") + text = text.replace("´", r"'") + text = text.replace("‘", r"'") + text = text.replace("‚", r"'") + text = text.replace("’", r"'") + text = text.replace("''", r'"') + text = text.replace("´´", r'"') + text = text.replace("…", r"...") + + return text + + def normalize(self, text): + pass + + +class BaseNormalizer(NormalizerI): + def __init__( + self, + lang, + remove_nuktas=False, + nasals_mode="do_nothing", + do_normalize_chandras=False, + do_normalize_vowel_ending=False, + ): + self.lang = lang + self.remove_nuktas = remove_nuktas + self.nasals_mode = nasals_mode + self.do_normalize_chandras = do_normalize_chandras + self.do_normalize_vowel_ending = do_normalize_vowel_ending + + self._init_normalize_chandras() + self._init_normalize_nasals() + self._init_normalize_vowel_ending() + # self._init_visarga_correction() + + def _init_normalize_vowel_ending(self): + if self.lang in langinfo.IE_LANGUAGES: + self.fn_vowel_ending = self._normalize_word_vowel_ending_ie + elif self.lang in langinfo.DRAVIDIAN_LANGUAGES: + self.fn_vowel_ending = self._normalize_word_vowel_ending_dravidian + else: + self.fn_vowel_ending = lambda x: x + + def _init_normalize_chandras(self): + substitution_offsets = [ + [0x0D, 0x0F], # chandra e, independent + [0x11, 0x13], # chandra o, independent + [0x45, 0x47], # chandra e , 0xde],pendent + [0x49, 0x4B], # chandra o , 0xde],pendent + # [0x72 , 0x0f], # mr: chandra e, independent + [0x00, 0x02], # chandrabindu + [0x01, 0x02], # chandrabindu + ] + + self.chandra_substitutions = [ + ( + langinfo.offset_to_char(x[0], self.lang), + langinfo.offset_to_char(x[1], self.lang), + ) + for x in substitution_offsets + ] + + def _normalize_chandras(self, text): + for match, repl in self.chandra_substitutions: + text = text.replace(match, repl) + return text + + def _init_to_anusvaara_strict(self): + """ + `r1_nasal=re.compile(r'\\u0919\\u094D([\\u0915-\\u0918])')` + """ + + pat_signatures = [ + [0x19, 0x15, 0x18], + [0x1E, 0x1A, 0x1D], + [0x23, 0x1F, 0x22], + [0x28, 0x24, 0x27], + [0x29, 0x24, 0x27], + [0x2E, 0x2A, 0x2D], + ] + + halant_offset = 0x4D + anusvaara_offset = 0x02 + + pats = [] + + for pat_signature in pat_signatures: + pat = re.compile( + r"{nasal}{halant}([{start_r}-{end_r}])".format( + nasal=langinfo.offset_to_char(pat_signature[0], self.lang), + halant=langinfo.offset_to_char(halant_offset, self.lang), + start_r=langinfo.offset_to_char(pat_signature[1], self.lang), + end_r=langinfo.offset_to_char(pat_signature[2], self.lang), + ) + ) + pats.append(pat) + + repl_string = "{anusvaara}\\1".format( + anusvaara=langinfo.offset_to_char(anusvaara_offset, self.lang) + ) + + self.pats_repls = (pats, repl_string) + + def _to_anusvaara_strict(self, text): + pats, repl_string = self.pats_repls + for pat in pats: + text = pat.sub(repl_string, text) + + return text + + def _init_to_anusvaara_relaxed(self): + """ + `r1_nasal=re.compile(r'\\u0919\\u094D([\\u0915-\\u0918])')` + """ + + nasals_list = [0x19, 0x1E, 0x23, 0x28, 0x29, 0x2E] + nasals_list_str = ",".join( + [langinfo.offset_to_char(x, self.lang) for x in nasals_list] + ) + + halant_offset = 0x4D + anusvaara_offset = 0x02 + + pat = re.compile( + r"[{nasals_list_str}]{halant}".format( + nasals_list_str=nasals_list_str, + halant=langinfo.offset_to_char(halant_offset, self.lang), + ) + ) + + repl_string = "{anusvaara}".format( + anusvaara=langinfo.offset_to_char(anusvaara_offset, self.lang) + ) + + self.pats_repls = (pat, repl_string) + + def _to_anusvaara_relaxed(self, text): + pat, repl_string = self.pats_repls + return pat.sub(repl_string, text) + + def _init_to_nasal_consonants(self): + """ + `r1_nasal=re.compile(r'\\u0919\\u094D([\\u0915-\\u0918])')` + """ + + pat_signatures = [ + [0x19, 0x15, 0x18], + [0x1E, 0x1A, 0x1D], + [0x23, 0x1F, 0x22], + [0x28, 0x24, 0x27], + [0x29, 0x24, 0x27], + [0x2E, 0x2A, 0x2D], + ] + + halant_offset = 0x4D + anusvaara_offset = 0x02 + + pats = [] + repl_strings = [] + + for pat_signature in pat_signatures: + pat = re.compile( + r"{anusvaara}([{start_r}-{end_r}])".format( + anusvaara=langinfo.offset_to_char(anusvaara_offset, self.lang), + start_r=langinfo.offset_to_char(pat_signature[1], self.lang), + end_r=langinfo.offset_to_char(pat_signature[2], self.lang), + ) + ) + pats.append(pat) + repl_string = "{nasal}{halant}\\1".format( + nasal=langinfo.offset_to_char(pat_signature[0], self.lang), + halant=langinfo.offset_to_char(halant_offset, self.lang), + ) + repl_strings.append(repl_string) + + self.pats_repls = list(zip(pats, repl_strings)) + + def _to_nasal_consonants(self, text): + for pat, repl in self.pats_repls: + text = pat.sub(repl, text) + + return text + + def _init_normalize_nasals(self): + if self.nasals_mode == "to_anusvaara_strict": + self._init_to_anusvaara_strict() + elif self.nasals_mode == "to_anusvaara_relaxed": + self._init_to_anusvaara_relaxed() + elif self.nasals_mode == "to_nasal_consonants": + self._init_to_nasal_consonants() + + def _normalize_nasals(self, text): + if self.nasals_mode == "to_anusvaara_strict": + return self._to_anusvaara_strict(text) + elif self.nasals_mode == "to_anusvaara_relaxed": + return self._to_anusvaara_relaxed(text) + elif self.nasals_mode == "to_nasal_consonants": + return self._to_nasal_consonants(text) + else: + return text + + def _normalize_word_vowel_ending_dravidian(self, word): + """ + for Dravidian + - consonant ending: add 'a' ki maatra + - halant ending: no change + - 'a' ki maatra: no change + """ + if len(word) > 0 and langinfo.is_consonant(word[-1], self.lang): + return word + langinfo.offset_to_char(0x3E, self.lang) + else: + return word + + def _normalize_word_vowel_ending_ie(self, word): + """ + for IE + - consonant ending: add halant + - halant ending: no change + - 'a' ki maatra: no change + """ + if len(word) > 0 and langinfo.is_consonant(word[-1], self.lang): + return word + langinfo.offset_to_char(langinfo.HALANTA_OFFSET, self.lang) + else: + return word + + def _normalize_vowel_ending(self, text): + return " ".join([self.fn_vowel_ending(w) for w in text.split(" ")]) + + def normalize(self, text): + """ + Method to be implemented for normalization for each script + """ + text = text.replace(NormalizerI.BYTE_ORDER_MARK, "") + text = text.replace(NormalizerI.BYTE_ORDER_MARK_2, "") + text = text.replace(NormalizerI.WORD_JOINER, "") + text = text.replace(NormalizerI.SOFT_HYPHEN, "") + + text = text.replace(NormalizerI.ZERO_WIDTH_SPACE, " ") # ?? + text = text.replace(NormalizerI.NO_BREAK_SPACE, " ") + + text = text.replace(NormalizerI.ZERO_WIDTH_NON_JOINER, "") + text = text.replace(NormalizerI.ZERO_WIDTH_JOINER, "") + + text = self._normalize_punctuations(text) + + if self.do_normalize_chandras: + text = self._normalize_chandras(text) + text = self._normalize_nasals(text) + if self.do_normalize_vowel_ending: + text = self._normalize_vowel_ending(text) + + return text + + def get_char_stats(self, text): + print(len(re.findall(NormalizerI.BYTE_ORDER_MARK, text))) + print(len(re.findall(NormalizerI.BYTE_ORDER_MARK_2, text))) + print(len(re.findall(NormalizerI.WORD_JOINER, text))) + print(len(re.findall(NormalizerI.SOFT_HYPHEN, text))) + + print(len(re.findall(NormalizerI.ZERO_WIDTH_SPACE, text))) + print(len(re.findall(NormalizerI.NO_BREAK_SPACE, text))) + + print(len(re.findall(NormalizerI.ZERO_WIDTH_NON_JOINER, text))) + print(len(re.findall(NormalizerI.ZERO_WIDTH_JOINER, text))) + + # for mobj in re.finditer(NormalizerI.ZERO_WIDTH_NON_JOINER,text): + # print text[mobj.start()-10:mobj.end()+10].replace('\n', ' ').replace(NormalizerI.ZERO_WIDTH_NON_JOINER,'').encode('utf-8') + # print hex(ord(text[mobj.end():mobj.end()+1])) + + def correct_visarga(self, text, visarga_char, char_range): + text = re.sub(r"([\u0900-\u097f]):", "\\1\u0903", text) + + +class DevanagariNormalizer(BaseNormalizer): + """ + Normalizer for the Devanagari script. In addition to basic normalization by the super class, + * Replaces the composite characters containing nuktas by their decomposed form + * replace pipe character '|' by poorna virama character + * replace colon ':' by visarga if the colon follows a charcter in this script + + """ + + NUKTA = "\u093C" + + def __init__( + self, + lang="hi", + remove_nuktas=False, + nasals_mode="do_nothing", + do_normalize_chandras=False, + do_normalize_vowel_ending=False, + ): + super(DevanagariNormalizer, self).__init__( + lang, + remove_nuktas, + nasals_mode, + do_normalize_chandras, + do_normalize_vowel_ending, + ) + + def normalize(self, text): + # common normalization for Indic scripts + text = super(DevanagariNormalizer, self).normalize(text) + + # chandra a replacement for Marathi + text = text.replace("\u0972", "\u090f") + + # decomposing Nukta based composite characters + text = text.replace("\u0929", "\u0928" + DevanagariNormalizer.NUKTA) + text = text.replace("\u0931", "\u0930" + DevanagariNormalizer.NUKTA) + text = text.replace("\u0934", "\u0933" + DevanagariNormalizer.NUKTA) + text = text.replace("\u0958", "\u0915" + DevanagariNormalizer.NUKTA) + text = text.replace("\u0959", "\u0916" + DevanagariNormalizer.NUKTA) + text = text.replace("\u095A", "\u0917" + DevanagariNormalizer.NUKTA) + text = text.replace("\u095B", "\u091C" + DevanagariNormalizer.NUKTA) + text = text.replace("\u095C", "\u0921" + DevanagariNormalizer.NUKTA) + text = text.replace("\u095D", "\u0922" + DevanagariNormalizer.NUKTA) + text = text.replace("\u095E", "\u092B" + DevanagariNormalizer.NUKTA) + text = text.replace("\u095F", "\u092F" + DevanagariNormalizer.NUKTA) + + if self.remove_nuktas: + text = text.replace(DevanagariNormalizer.NUKTA, "") + + # replace pipe character for poorna virama + text = text.replace("\u007c", "\u0964") + + # correct visarga + text = re.sub(r"([\u0900-\u097f]):", "\\1\u0903", text) + + return text + + def get_char_stats(self, text): + super(DevanagariNormalizer, self).get_char_stats(text) + + print((len(re.findall("\u0929", text)))) + print((len(re.findall("\u0931", text)))) + print((len(re.findall("\u0934", text)))) + print((len(re.findall("\u0958", text)))) + print((len(re.findall("\u0959", text)))) + print((len(re.findall("\u095A", text)))) + print((len(re.findall("\u095B", text)))) + print((len(re.findall("\u095C", text)))) + print((len(re.findall("\u095D", text)))) + print((len(re.findall("\u095E", text)))) + print((len(re.findall("\u095F", text)))) + + # print(len(re.findall(u'\u0928'+DevanagariNormalizer.NUKTA,text))) + # print(len(re.findall(u'\u0930'+DevanagariNormalizer.NUKTA,text))) + # print(len(re.findall(u'\u0933'+DevanagariNormalizer.NUKTA,text))) + # print(len(re.findall(u'\u0915'+DevanagariNormalizer.NUKTA,text))) + # print(len(re.findall(u'\u0916'+DevanagariNormalizer.NUKTA,text))) + # print(len(re.findall(u'\u0917'+DevanagariNormalizer.NUKTA,text))) + # print(len(re.findall(u'\u091C'+DevanagariNormalizer.NUKTA,text))) + # print(len(re.findall(u'\u0921'+DevanagariNormalizer.NUKTA,text))) + # print(len(re.findall(u'\u0922'+DevanagariNormalizer.NUKTA,text))) + # print(len(re.findall(u'\u092B'+DevanagariNormalizer.NUKTA,text))) + # print(len(re.findall(u'\u092F'+DevanagariNormalizer.NUKTA,text))) + + +class GurmukhiNormalizer(BaseNormalizer): + """ + Normalizer for the Gurmukhi script. In addition to basic normalization by the super class, + * Replaces the composite characters containing nuktas by their decomposed form + * Replace the reserved character for poorna virama (if used) with the recommended generic Indic scripts poorna virama + * replace pipe character '|' by poorna virama character + * replace colon ':' by visarga if the colon follows a charcter in this script + """ + + NUKTA = "\u0A3C" + + VOWEL_NORM_MAPS = { + ## http://www.unicode.org/versions/Unicode12.1.0/ch12.pdf + ## Table 12-16 + "\u0a05\u0a3e": "\u0a06", + "\u0a72\u0a3f": "\u0a07", + "\u0a72\u0a40": "\u0a08", + "\u0a73\u0a41": "\u0a09", + "\u0a73\u0a42": "\u0a0a", + "\u0a72\u0a47": "\u0a0f", + "\u0a05\u0a48": "\u0a10", + "\u0a73\u0a4b": "\u0a13", + "\u0a05\u0a4c": "\u0a14", + } + + def __init__( + self, + lang="pa", + remove_nuktas=False, + nasals_mode="do_nothing", + do_normalize_chandras=False, + do_normalize_vowel_ending=False, + do_canonicalize_addak=False, + do_canonicalize_tippi=False, + do_replace_vowel_bases=False, + ): + super(GurmukhiNormalizer, self).__init__( + lang, + remove_nuktas, + nasals_mode, + do_normalize_chandras, + do_normalize_vowel_ending, + ) + self.do_canonicalize_addak = do_canonicalize_addak + self.do_canonicalize_tippi = do_canonicalize_tippi + self.do_replace_vowel_bases = do_replace_vowel_bases + + def _normalize_vowels(self, text): + """ """ + + ## standard vowel replacements as per suggestions in + ## http://www.unicode.org/versions/Unicode12.1.0/ch12.pdf + ## Table 12-16 + + for k, v in GurmukhiNormalizer.VOWEL_NORM_MAPS.items(): + text = text.replace(k, v) + + ## the above mappings should account for majority of the variantions, + ## Rest are handled via this generic rule which looks at the diacritic + ## following the 2 special characters + ## TBD: don't see evidence for this in Wikipedia corpus + + ## If these special characters occur without any diacritic, replace them with closet + ## equivalent vowels + if self.do_replace_vowel_bases: + text = text.replace("\u0a72", "\u0a07") + text = text.replace("\u0a73", "\u0a09") + + return text + + def normalize(self, text): + # Addak + if self.do_canonicalize_addak: + ## replace addak+consonant with consonat+halant+consonant + text = re.sub(r"\u0a71(.)", "\\1\u0a4d\\1", text) + + # Tippi + if self.do_canonicalize_tippi: + text = text.replace("\u0a70", "\u0a02") + + # Vowels: Gurumuki has multiple ways of representing independent vowels due + # to the characters 'iri' and 'ura'. + text = self._normalize_vowels(text) + + # common normalization for Indic scripts + text = super(GurmukhiNormalizer, self).normalize(text) + + # decomposing Nukta based composite characters + text = text.replace("\u0a33", "\u0a32" + GurmukhiNormalizer.NUKTA) + text = text.replace("\u0a36", "\u0a38" + GurmukhiNormalizer.NUKTA) + text = text.replace("\u0a59", "\u0a16" + GurmukhiNormalizer.NUKTA) + text = text.replace("\u0a5a", "\u0a17" + GurmukhiNormalizer.NUKTA) + text = text.replace("\u0a5b", "\u0a1c" + GurmukhiNormalizer.NUKTA) + text = text.replace("\u0a5e", "\u0a2b" + GurmukhiNormalizer.NUKTA) + + if self.remove_nuktas: + text = text.replace(GurmukhiNormalizer.NUKTA, "") + + # replace the poorna virama codes specific to script + # with generic Indic script codes + text = text.replace("\u0a64", "\u0964") + text = text.replace("\u0a65", "\u0965") + + ## replace pipe character for poorna virama + text = text.replace("\u007c", "\u0964") + + # correct visarge + text = re.sub(r"([\u0a00-\u0a7f]):", "\\1\u0a03", text) + + return text + + +class GujaratiNormalizer(BaseNormalizer): + """ + Normalizer for the Gujarati script. In addition to basic normalization by the super class, + * Replace the reserved character for poorna virama (if used) with the recommended generic Indic scripts poorna virama + * replace colon ':' by visarga if the colon follows a charcter in this script + """ + + NUKTA = "\u0ABC" + + def __init__( + self, + lang="gu", + remove_nuktas=False, + nasals_mode="do_nothing", + do_normalize_chandras=False, + do_normalize_vowel_ending=False, + ): + super(GujaratiNormalizer, self).__init__( + lang, + remove_nuktas, + nasals_mode, + do_normalize_chandras, + do_normalize_vowel_ending, + ) + + def normalize(self, text): + # common normalization for Indic scripts + text = super(GujaratiNormalizer, self).normalize(text) + + # decomposing Nukta based composite characters + if self.remove_nuktas: + text = text.replace(GujaratiNormalizer.NUKTA, "") + + # replace the poorna virama codes specific to script + # with generic Indic script codes + text = text.replace("\u0ae4", "\u0964") + text = text.replace("\u0ae5", "\u0965") + + # correct visarge + text = re.sub(r"([\u0a80-\u0aff]):", "\\1\u0a83", text) + + return text + + +class OriyaNormalizer(BaseNormalizer): + """ + Normalizer for the Oriya script. In addition to basic normalization by the super class, + * Replaces the composite characters containing nuktas by their decomposed form + * Replace the reserved character for poorna virama (if used) with the recommended generic Indic scripts poorna virama + * Canonicalize two part dependent vowels + * Replace 'va' with 'ba' + * replace pipe character '|' by poorna virama character + * replace colon ':' by visarga if the colon follows a charcter in this script + """ + + NUKTA = "\u0B3C" + + VOWEL_NORM_MAPS = { + ## See Table 12-22 in http://www.unicode.org/versions/Unicode12.1.0/ch12.pdf + "\u0b05\u0b3e": "\u0b06", + "\u0b0f\u0b57": "\u0b10", + "\u0b13\u0b57": "\u0b14", + } + + def __init__( + self, + lang="or", + remove_nuktas=False, + nasals_mode="do_nothing", + do_normalize_chandras=False, + do_normalize_vowel_ending=False, + do_remap_wa=False, + ): + super(OriyaNormalizer, self).__init__( + lang, + remove_nuktas, + nasals_mode, + do_normalize_chandras, + do_normalize_vowel_ending, + ) + self.do_remap_wa = do_remap_wa + + def normalize(self, text): + # common normalization for Indic scripts + text = super(OriyaNormalizer, self).normalize(text) + + ## standard vowel replacements as per suggestions in Unicode documents + for k, v in OriyaNormalizer.VOWEL_NORM_MAPS.items(): + text = text.replace(k, v) + + # decomposing Nukta based composite characters + text = text.replace("\u0b5c", "\u0b21" + OriyaNormalizer.NUKTA) + text = text.replace("\u0b5d", "\u0b22" + OriyaNormalizer.NUKTA) + + if self.remove_nuktas: + text = text.replace(OriyaNormalizer.NUKTA, "") + + # replace the poorna virama codes specific to script + # with generic Indic script codes + text = text.replace("\u0b64", "\u0964") + text = text.replace("\u0b65", "\u0965") + + # replace pipe character for poorna virama + text = text.replace("\u0b7c", "\u0964") + + # replace wa with ba + if self.do_remap_wa: + text = text.replace("\u0b71", "\u0b2c") + + # replace va with ba + # NOTE: documentation (chapter on Indic scripts) and codepoint chart seem contradictory + # (this applied to wa to ba rule also above) + text = text.replace("\u0b35", "\u0b2c") + + # AI dependent vowel sign + text = text.replace("\u0b47\u0b56", "\u0b58") + + # two part dependent vowels + text = text.replace("\u0b47\u0b3e", "\u0b4b") + text = text.replace("\u0b47\u0b57", "\u0b4c") + + # additional consonant - not clear how to handle this + # ignore + + # correct visarge + text = re.sub(r"([\u0b00-\u0b7f]):", "\\1\u0b03", text) + + return text + + +class BengaliNormalizer(BaseNormalizer): + """ + Normalizer for the Bengali script. In addition to basic normalization by the super class, + * Replaces the composite characters containing nuktas by their decomposed form + * Replace the reserved character for poorna virama (if used) with the recommended generic Indic scripts poorna virama + * Canonicalize two part dependent vowels + * replace pipe character '|' by poorna virama character + * replace colon ':' by visarga if the colon follows a charcter in this script + """ + + NUKTA = "\u09BC" + + def __init__( + self, + lang="bn", + remove_nuktas=False, + nasals_mode="do_nothing", + do_normalize_chandras=False, + do_normalize_vowel_ending=False, + do_remap_assamese_chars=False, + ): + super(BengaliNormalizer, self).__init__( + lang, + remove_nuktas, + nasals_mode, + do_normalize_chandras, + do_normalize_vowel_ending, + ) + self.do_remap_assamese_chars = do_remap_assamese_chars + + def normalize(self, text): + # common normalization for Indic scripts + text = super(BengaliNormalizer, self).normalize(text) + + # decomposing Nukta based composite characters + text = text.replace("\u09dc", "\u09a1" + BengaliNormalizer.NUKTA) + text = text.replace("\u09dd", "\u09a2" + BengaliNormalizer.NUKTA) + text = text.replace("\u09df", "\u09af" + BengaliNormalizer.NUKTA) + + if self.remove_nuktas: + text = text.replace(BengaliNormalizer.NUKTA, "") + + if self.do_remap_assamese_chars and self.lang == "as": + text = text.replace("\u09f0", "\u09b0") # 'ra' character + text = text.replace("\u09f1", "\u09ac") # 'va' character + + # replace the poorna virama codes specific to script + # with generic Indic script codes + text = text.replace("\u09e4", "\u0964") + text = text.replace("\u09e5", "\u0965") + + # replace pipe character for poorna virama + text = text.replace("\u007c", "\u0964") + # replace bengali currency numerator four for poorna virama (it looks similar and is used as a substitute) + text = text.replace("\u09f7", "\u0964") + + # two part dependent vowels + text = text.replace("\u09c7\u09be", "\u09cb") + text = text.replace("\u09c7\u09d7", "\u09cc") + + # correct visarge + text = re.sub(r"([\u0980-\u09ff]):", "\\1\u0983", text) + + return text + + +class TamilNormalizer(BaseNormalizer): + """ + Normalizer for the Tamil script. In addition to basic normalization by the super class, + * Replace the reserved character for poorna virama (if used) with the recommended generic Indic scripts poorna virama + * canonicalize two-part dependent vowel signs + * replace colon ':' by visarga if the colon follows a charcter in this script + """ + + def __init__( + self, + lang="ta", + remove_nuktas=False, + nasals_mode="do_nothing", + do_normalize_chandras=False, + do_normalize_vowel_ending=False, + ): + super(TamilNormalizer, self).__init__( + lang, + remove_nuktas, + nasals_mode, + do_normalize_chandras, + do_normalize_vowel_ending, + ) + + def normalize(self, text): + # common normalization for Indic scripts + text = super(TamilNormalizer, self).normalize(text) + + # replace the poorna virama codes specific to script + # with generic Indic script codes + text = text.replace("\u0be4", "\u0964") + text = text.replace("\u0be5", "\u0965") + + # two part dependent vowels + text = text.replace("\u0b92\u0bd7", "\u0b94") + text = text.replace("\u0bc6\u0bbe", "\u0bca") + text = text.replace("\u0bc7\u0bbe", "\u0bcb") + text = text.replace("\u0bc6\u0bd7", "\u0bcc") + + # correct visarge + text = re.sub(r"([\u0b80-\u0bff]):", "\\1\u0b83", text) + + return text + + +class TeluguNormalizer(BaseNormalizer): + """ + Normalizer for the Teluguscript. In addition to basic normalization by the super class, + * Replace the reserved character for poorna virama (if used) with the recommended generic Indic scripts poorna virama + * canonicalize two-part dependent vowel signs + * replace colon ':' by visarga if the colon follows a charcter in this script + """ + + def __init__( + self, + lang="te", + remove_nuktas=False, + nasals_mode="do_nothing", + do_normalize_chandras=False, + do_normalize_vowel_ending=False, + ): + super(TeluguNormalizer, self).__init__( + lang, + remove_nuktas, + nasals_mode, + do_normalize_chandras, + do_normalize_vowel_ending, + ) + + def normalize(self, text): + # common normalization for Indic scripts + text = super(TeluguNormalizer, self).normalize(text) + + # replace the poorna virama codes specific to script + # with generic Indic script codes + text = text.replace("\u0c64", "\u0964") + text = text.replace("\u0c65", "\u0965") + + # dependent vowels + text = text.replace("\u0c46\u0c56", "\u0c48") + + # correct visarge + text = re.sub(r"([\u0c00-\u0c7f]):", "\\1\u0c03", text) + + return text + + def get_char_stats(self, text): + pass + + +class KannadaNormalizer(BaseNormalizer): + """ + Normalizer for the Kannada script. In addition to basic normalization by the super class, + * Replace the reserved character for poorna virama (if used) with the recommended generic Indic scripts poorna virama + * canonicalize two-part dependent vowel signs + * replace colon ':' by visarga if the colon follows a charcter in this script + """ + + def __init__( + self, + lang="kn", + remove_nuktas=False, + nasals_mode="do_nothing", + do_normalize_chandras=False, + do_normalize_vowel_ending=False, + ): + super(KannadaNormalizer, self).__init__( + lang, + remove_nuktas, + nasals_mode, + do_normalize_chandras, + do_normalize_vowel_ending, + ) + + def normalize(self, text): + # common normalization for Indic scripts + text = super(KannadaNormalizer, self).normalize(text) + + # replace the poorna virama codes specific to script + # with generic Indic script codes + text = text.replace("\u0ce4", "\u0964") + text = text.replace("\u0ce5", "\u0965") + + # dependent vowels + text = text.replace("\u0cbf\u0cd5", "\u0cc0") + text = text.replace("\u0cc6\u0cd5", "\u0cc7") + text = text.replace("\u0cc6\u0cd6", "\u0cc8") + text = text.replace("\u0cc6\u0cc2", "\u0cca") + text = text.replace("\u0cca\u0cd5", "\u0ccb") + + # correct visarge + text = re.sub(r"([\u0c80-\u0cff]):", "\\1\u0c83", text) + + return text + + +class MalayalamNormalizer(BaseNormalizer): + """ + Normalizer for the Malayalam script. In addition to basic normalization by the super class, + * Replace the reserved character for poorna virama (if used) with the recommended generic Indic scripts poorna virama + * canonicalize two-part dependent vowel signs + * Change from old encoding of chillus (till Unicode 5.0) to new encoding + * replace colon ':' by visarga if the colon follows a charcter in this script + """ + + CHILLU_CHAR_MAP = { + "\u0d7a": "\u0d23", + "\u0d7b": "\u0d28", + "\u0d7c": "\u0d30", + "\u0d7d": "\u0d32", + "\u0d7e": "\u0d33", + "\u0d7f": "\u0d15", + } + + def _canonicalize_chillus(self, text): + for chillu, char in MalayalamNormalizer.CHILLU_CHAR_MAP.items(): + text = text.replace(chillu, "{}\u0d4d".format(char)) + return text + + def _correct_geminated_T(self, text): + return text.replace("\u0d31\u0d4d\u0d31", "\u0d1f\u0d4d\u0d1f") + + def __init__( + self, + lang="ml", + remove_nuktas=False, + nasals_mode="do_nothing", + do_normalize_chandras=False, + do_normalize_vowel_ending=False, + do_canonicalize_chillus=False, + do_correct_geminated_T=False, + ): + super(MalayalamNormalizer, self).__init__( + lang, + remove_nuktas, + nasals_mode, + do_normalize_chandras, + do_normalize_vowel_ending, + ) + self.do_canonicalize_chillus = do_canonicalize_chillus + self.do_correct_geminated_T = do_correct_geminated_T + + def normalize(self, text): + # Change from old encoding of chillus (till Unicode 5.0) to new encoding + text = text.replace("\u0d23\u0d4d\u200d", "\u0d7a") + text = text.replace("\u0d28\u0d4d\u200d", "\u0d7b") + text = text.replace("\u0d30\u0d4d\u200d", "\u0d7c") + text = text.replace("\u0d32\u0d4d\u200d", "\u0d7d") + text = text.replace("\u0d33\u0d4d\u200d", "\u0d7e") + text = text.replace("\u0d15\u0d4d\u200d", "\u0d7f") + + # Normalize chillus + if self.do_canonicalize_chillus: + text = self._canonicalize_chillus(text) + + # common normalization for Indic scripts + text = super(MalayalamNormalizer, self).normalize(text) + + # replace the poorna virama codes specific to script + # with generic Indic script codes + text = text.replace("\u0d64", "\u0964") + text = text.replace("\u0d65", "\u0965") + + # dependent vowels + text = text.replace("\u0d46\u0d3e", "\u0d4a") + text = text.replace("\u0d47\u0d3e", "\u0d4b") + + # au forms + text = text.replace("\u0d46\u0d57", "\u0d4c") + text = text.replace("\u0d57", "\u0d4c") + + # correct geminated T + if self.do_correct_geminated_T: + text = self._correct_geminated_T(text) + + # correct visarga + text = re.sub(r"([\u0d00-\u0d7f]):", "\\1\u0d03", text) + + return text + + +class UrduNormalizer(NormalizerI): + """Uses UrduHack library. + https://docs.urduhack.com/en/stable/_modules/urduhack/normalization/character.html#normalize + """ + + def __init__(self, lang, remove_nuktas=True): + self.lang = lang + self.remove_nuktas = remove_nuktas + + from indicnlp.urduhack.normalization import ( + remove_diacritics, + normalize_characters, + normalize_combine_characters, + ) # TODO: Use only required normalizers + from indicnlp.urduhack.preprocessing import ( + normalize_whitespace, + digits_space, + all_punctuations_space, + english_characters_space, + ) + + self.normalize_whitespace = normalize_whitespace + self.digits_space = digits_space + self.all_punctuations_space = all_punctuations_space + self.english_characters_space = english_characters_space + + self.remove_diacritics = remove_diacritics + self.normalize_characters = normalize_characters + self.normalize_combine_characters = normalize_combine_characters + + def normalize(self, text): + text = self._normalize_punctuations(text) + text = self.normalize_whitespace(text) + if self.remove_nuktas: + text = self.remove_diacritics(text) + text = self.normalize_characters(text) + text = self.normalize_combine_characters(text) + text = self.digits_space(text) + text = self.all_punctuations_space(text) + text = self.english_characters_space(text) + return text + + +class IndicNormalizerFactory(object): + """ + Factory class to create language specific normalizers. + """ + + def get_normalizer(self, language, **kwargs): + """ + Call the get_normalizer function to get the language specific normalizer + Paramters: + |language: language code + |remove_nuktas: boolean, should the normalizer remove nukta characters + """ + normalizer = None + if language in ["hi", "mr", "sa", "kK", "ne", "sd"]: + normalizer = DevanagariNormalizer(lang=language, **kwargs) + elif language in ["ur"]: + normalizer = UrduNormalizer(lang=language, **kwargs) + elif language in ["pa"]: + normalizer = GurmukhiNormalizer(lang=language, **kwargs) + elif language in ["gu"]: + normalizer = GujaratiNormalizer(lang=language, **kwargs) + elif language in ["bn"]: + normalizer = BengaliNormalizer(lang=language, **kwargs) + elif language in ["as"]: + normalizer = BengaliNormalizer(lang=language, **kwargs) + elif language in ["or"]: + normalizer = OriyaNormalizer(lang=language, **kwargs) + elif language in ["ml"]: + normalizer = MalayalamNormalizer(lang=language, **kwargs) + elif language in ["kn"]: + normalizer = KannadaNormalizer(lang=language, **kwargs) + elif language in ["ta"]: + normalizer = TamilNormalizer(lang=language, **kwargs) + elif language in ["te"]: + normalizer = TeluguNormalizer(lang=language, **kwargs) + else: + normalizer = BaseNormalizer(lang=language, **kwargs) + + return normalizer + + def is_language_supported(self, language): + """ + Is the language supported? + """ + if language in [ + "hi", + "mr", + "sa", + "kK", + "ne", + "sd", + "ur", + "pa", + "gu", + "bn", + "as", + "or", + "ml", + "kn", + "ta", + "te", + ]: + return True + else: + return False + + +if __name__ == "__main__": + if len(sys.argv) < 4: + print( + "Usage: python normalize.py [] []" + ) + sys.exit(1) + + language = sys.argv[3] + remove_nuktas = False + normalize_nasals = "do_nothing" + if len(sys.argv) >= 5: + remove_nuktas = bool(sys.argv[4]) + if len(sys.argv) >= 6: + normalize_nasals = sys.argv[5] + + # create normalizer + factory = IndicNormalizerFactory() + normalizer = factory.get_normalizer( + language, remove_nuktas=remove_nuktas, nasals_mode=normalize_nasals + ) + + # DO normalization + with codecs.open(sys.argv[1], "r", "utf-8") as ifile: + with codecs.open(sys.argv[2], "w", "utf-8") as ofile: + for line in ifile.readlines(): + normalized_line = normalizer.normalize(line) + ofile.write(normalized_line) + + ## gather status about normalization + # with codecs.open(sys.argv[1],'r','utf-8') as ifile: + # normalizer=DevanagariNormalizer() + # text=string.join(ifile.readlines(),sep='') + # normalizer.get_char_stats(text) diff --git a/libs/indic_nlp_library/indicnlp/script/__init__.py b/libs/indic_nlp_library/indicnlp/script/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/libs/indic_nlp_library/indicnlp/script/english_script.py b/libs/indic_nlp_library/indicnlp/script/english_script.py new file mode 100644 index 0000000000000000000000000000000000000000..fd3a407adcacbe14ae99f1cace4c10a4d5d15f6a --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/script/english_script.py @@ -0,0 +1,172 @@ +# +# Copyright (c) 2013-present, Anoop Kunchukuttan +# All rights reserved. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# + +import os +import pandas as pd +import numpy as np + +from indicnlp import common + + +#### Maps from ARPABET to Internal Id +ARPABET_ID_MAP = {} +ID_ARPABET_MAP = {} + + +### +# Phonetic Information about script characters +### + +""" Phonetic data for English """ +ENGLISH_PHONETIC_DATA = None + +""" Phonetic vector for English""" +ENGLISH_PHONETIC_VECTORS = None + +""" Length of phonetic vector """ +PHONETIC_VECTOR_LENGTH = 38 + +""" Start offset for the phonetic feature vector in the phonetic data vector """ +PHONETIC_VECTOR_START_OFFSET = 6 + +## PHONETIC PROPERTIES in order in which they occur in the vector +## This list must be in sync with the keys in the PV_PROP_RANGES dictionary +PV_PROP = [ + "basic_type", + "vowel_length", + "vowel_strength", + "vowel_status", + "consonant_type", + "articulation_place", + "aspiration", + "voicing", + "nasalization", + "vowel_horizontal", + "vowel_vertical", + "vowel_roundness", +] + +### +# Bit vector ranges for various properties +### + +PV_PROP_RANGES = { + "basic_type": [0, 6], + "vowel_length": [6, 8], + "vowel_strength": [8, 11], + "vowel_status": [11, 13], + "consonant_type": [13, 18], + "articulation_place": [18, 23], + "aspiration": [23, 25], + "voicing": [25, 27], + "nasalization": [27, 29], + "vowel_horizontal": [29, 32], + "vowel_vertical": [32, 36], + "vowel_roundness": [36, 38], +} + + +#### +# Indexes into the Phonetic Vector +#### +PVIDX_BT_VOWEL = 0 +PVIDX_BT_CONSONANT = 1 +PVIDX_BT_NUKTA = 2 +PVIDX_BT_HALANT = 3 +PVIDX_BT_ANUSVAAR = 4 +PVIDX_BT_MISC = 5 +PVIDX_BT_S = PVIDX_BT_VOWEL +PVIDX_BT_E = PVIDX_BT_MISC + 1 + +PVIDX_VSTAT_DEP = 12 + +#### +SCRIPT_RANGE_START = 0x0D00 +## TBD +SCRIPT_RANGE_END = 0x0D2E + + +def init(): + """ + To be called by library loader, do not call it in your program + """ + + global ENGLISH_PHONETIC_DATA, ENGLISH_PHONETIC_VECTORS, PHONETIC_VECTOR_LENGTH, PHONETIC_VECTOR_START_OFFSET + + ENGLISH_PHONETIC_DATA = pd.read_csv( + os.path.join( + common.get_resources_path(), "script", "english_script_phonetic_data.csv" + ), + encoding="utf-8", + ) + + ENGLISH_PHONETIC_VECTORS = ENGLISH_PHONETIC_DATA.iloc[ + :, PHONETIC_VECTOR_START_OFFSET: + ].values + + PHONETIC_VECTOR_LENGTH = ENGLISH_PHONETIC_VECTORS.shape[1] + + ### Load mapping from ARPABET representation of phoneme to internal ID + global ARPABET_ID_MAP, ID_ARPABET_MAP + + with open( + os.path.join(common.get_resources_path(), "script", "english_arpabet_list.csv"), + "r", + encoding="utf-8", + ) as infile: + for ph_id, name in enumerate(iter(infile)): + name = name.strip() + ARPABET_ID_MAP[name] = ph_id + ID_ARPABET_MAP[ph_id] = name + + +def phoneme_to_offset(ph): + return ARPABET_ID_MAP[ph] + + +def offset_to_phoneme(ph_id): + return ID_ARPABET_MAP[ph_id] + + +def phoneme_to_enc(ph): + return chr(SCRIPT_RANGE_START + phoneme_to_offset(ph)) + + +def enc_to_phoneme(ph): + return offset_to_phoneme(enc_to_offset(ph)) + + +def enc_to_offset(c): + return ord(c) - SCRIPT_RANGE_START + + +def in_range(offset): + return offset >= SCRIPT_RANGE_START and offset < SCRIPT_RANGE_END + + +def get_phonetic_info(lang): + return (ENGLISH_PHONETIC_DATA, ENGLISH_PHONETIC_VECTORS) + + +def invalid_vector(): + ## TODO: check if np datatype is correct? + return np.array([0] * PHONETIC_VECTOR_LENGTH) + + +def get_phonetic_feature_vector(p, lang): + offset = enc_to_offset(p) + + if not in_range(offset): + return invalid_vector() + + phonetic_data, phonetic_vectors = get_phonetic_info(lang) + + if phonetic_data.iloc[offset]["Valid Vector Representation"] == 0: + return invalid_vector() + + return phonetic_vectors[offset] diff --git a/libs/indic_nlp_library/indicnlp/script/indic_scripts.py b/libs/indic_nlp_library/indicnlp/script/indic_scripts.py new file mode 100644 index 0000000000000000000000000000000000000000..619d25b7c7e6f2393df71e0cd342151b94f8fc99 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/script/indic_scripts.py @@ -0,0 +1,362 @@ +# +# Copyright (c) 2013-present, Anoop Kunchukuttan +# All rights reserved. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# + +import pandas as pd +import numpy as np +import os + +from indicnlp import common +from indicnlp.common import IndicNlpException +from indicnlp import langinfo as li + +### +# Phonetic Information about script characters +### + +""" Phonetic data about all languages except Tamil """ +ALL_PHONETIC_DATA = None + +""" Phonetic data for Tamil """ +TAMIL_PHONETIC_DATA = None + +""" Phonetic vector for all languages except Tamil """ +ALL_PHONETIC_VECTORS = None + +""" Phonetic vector for Tamil """ +TAMIL_PHONETIC_VECTORS = None + +""" Length of phonetic vector """ +PHONETIC_VECTOR_LENGTH = 38 + +""" Start offset for the phonetic feature vector in the phonetic data vector """ +PHONETIC_VECTOR_START_OFFSET = 6 + +## PHONETIC PROPERTIES in order in which they occur in the vector +## This list must be in sync with the keys in the PV_PROP_RANGES dictionary +PV_PROP = [ + "basic_type", + "vowel_length", + "vowel_strength", + "vowel_status", + "consonant_type", + "articulation_place", + "aspiration", + "voicing", + "nasalization", + "vowel_horizontal", + "vowel_vertical", + "vowel_roundness", +] + +### +# Bit vector ranges for various properties +### + +PV_PROP_RANGES = { + "basic_type": [0, 6], + "vowel_length": [6, 8], + "vowel_strength": [8, 11], + "vowel_status": [11, 13], + "consonant_type": [13, 18], + "articulation_place": [18, 23], + "aspiration": [23, 25], + "voicing": [25, 27], + "nasalization": [27, 29], + "vowel_horizontal": [29, 32], + "vowel_vertical": [32, 36], + "vowel_roundness": [36, 38], +} + + +#### +# Indexes into the Phonetic Vector +#### +PVIDX_BT_VOWEL = 0 +PVIDX_BT_CONSONANT = 1 +PVIDX_BT_NUKTA = 2 +PVIDX_BT_HALANT = 3 +PVIDX_BT_ANUSVAAR = 4 +PVIDX_BT_MISC = 5 +PVIDX_BT_S = PVIDX_BT_VOWEL +PVIDX_BT_E = PVIDX_BT_MISC + 1 + +PVIDX_VSTAT_DEP = 12 + +##### +# Unicode information about characters +##### + +SCRIPT_OFFSET_START = 0 +SCRIPT_OFFSET_RANGE = 0x80 + + +def init(): + """ + To be called by library loader, do not call it in your program + """ + + global ALL_PHONETIC_DATA, ALL_PHONETIC_VECTORS, TAMIL_PHONETIC_DATA, TAMIL_PHONETIC_VECTORS, PHONETIC_VECTOR_LENGTH, PHONETIC_VECTOR_START_OFFSET + + ALL_PHONETIC_DATA = pd.read_csv( + os.path.join( + common.get_resources_path(), "script", "all_script_phonetic_data.csv" + ), + encoding="utf-8", + ) + TAMIL_PHONETIC_DATA = pd.read_csv( + os.path.join( + common.get_resources_path(), "script", "tamil_script_phonetic_data.csv" + ), + encoding="utf-8", + ) + + ALL_PHONETIC_VECTORS = ALL_PHONETIC_DATA.iloc[ + :, PHONETIC_VECTOR_START_OFFSET: + ].values + TAMIL_PHONETIC_VECTORS = TAMIL_PHONETIC_DATA.iloc[ + :, PHONETIC_VECTOR_START_OFFSET: + ].values + + PHONETIC_VECTOR_LENGTH = ALL_PHONETIC_VECTORS.shape[1] + + +def is_supported_language(lang): + return lang in list(li.SCRIPT_RANGES.keys()) + + +def get_offset(c, lang): + if not is_supported_language(lang): + raise IndicNlpException("Language {} not supported".format(lang)) + return ord(c) - li.SCRIPT_RANGES[lang][0] + + +def offset_to_char(off, lang): + """ + Applicable to Brahmi derived Indic scripts + """ + if not is_supported_language(lang): + raise IndicNlpException("Language {} not supported".format(lang)) + return chr(off + li.SCRIPT_RANGES[lang][0]) + + +def is_indiclang_char(c, lang): + """ + Applicable to Brahmi derived Indic scripts + Note that DANDA and DOUBLE_DANDA have the same Unicode codepoint for all Indic scripts + """ + if not is_supported_language(lang): + raise IndicNlpException("Language {} not supported".format(lang)) + o = get_offset(c, lang) + return ( + (o >= SCRIPT_OFFSET_START and o < SCRIPT_OFFSET_RANGE) + or ord(c) == li.DANDA + or ord(c) == li.DOUBLE_DANDA + ) + + +def in_coordinated_range_offset(c_offset): + """ + Applicable to Brahmi derived Indic scripts + """ + return ( + c_offset >= li.COORDINATED_RANGE_START_INCLUSIVE + and c_offset <= li.COORDINATED_RANGE_END_INCLUSIVE + ) + + +def in_coordinated_range(c, lang): + if not is_supported_language(lang): + raise IndicNlpException("Language {} not supported".format(lang)) + return in_coordinated_range_offset(get_offset(c, lang)) + + +def get_phonetic_info(lang): + if not is_supported_language(lang): + raise IndicNlpException("Language {} not supported".format(lang)) + phonetic_data = ALL_PHONETIC_DATA if lang != li.LC_TA else TAMIL_PHONETIC_DATA + phonetic_vectors = ( + ALL_PHONETIC_VECTORS if lang != li.LC_TA else TAMIL_PHONETIC_VECTORS + ) + + return (phonetic_data, phonetic_vectors) + + +def invalid_vector(): + ## TODO: check if np datatype is correct? + return np.array([0] * PHONETIC_VECTOR_LENGTH) + + +def get_phonetic_feature_vector(c, lang): + offset = get_offset(c, lang) + + if not in_coordinated_range_offset(offset): + return invalid_vector() + + phonetic_data, phonetic_vectors = get_phonetic_info(lang) + + if phonetic_data.iloc[offset]["Valid Vector Representation"] == 0: + return invalid_vector() + + return phonetic_vectors[offset] + + +def get_phonetic_feature_vector_offset(offset, lang): + if not in_coordinated_range_offset(offset): + return invalid_vector() + + phonetic_data, phonetic_vectors = get_phonetic_info(lang) + + if phonetic_data.iloc[offset]["Valid Vector Representation"] == 0: + return invalid_vector() + + return phonetic_vectors[offset] + + +### Unary operations on vectors +def is_valid(v): + return np.sum(v) > 0 + + +def is_vowel(v): + return v[PVIDX_BT_VOWEL] == 1 + + +def is_consonant(v): + return v[PVIDX_BT_CONSONANT] == 1 + + +def is_halant(v): + return v[PVIDX_BT_HALANT] == 1 + + +def is_nukta(v): + return v[PVIDX_BT_NUKTA] == 1 + + +def is_anusvaar(v): + return v[PVIDX_BT_ANUSVAAR] == 1 + + +def is_misc(v): + return v[PVIDX_BT_MISC] == 1 + + +def is_dependent_vowel(v): + return is_vowel(v) and v[PVIDX_VSTAT_DEP] == 1 + + +def is_plosive(v): + return is_consonant(v) and get_property_vector(v, "consonant_type")[0] == 1 + + +### Binary operations on phonetic vectors + + +def or_vectors(v1, v2): + return np.array([1 if (b1 + b2) >= 1 else 0 for b1, b2 in zip(v1, v2)]) + + +def xor_vectors(v1, v2): + return np.array([1 if b1 != b2 else 0 for b1, b2 in zip(v1, v2)]) + + +### Getting properties from phonetic vectors + + +def get_property_vector(v, prop_name): + return v[PV_PROP_RANGES[prop_name][0] : PV_PROP_RANGES[prop_name][1]] + + +def get_property_value(v, prop_name): + factor_bits = get_property_vector(v, prop_name).tolist() + + v = 0 + c = 1 + for b in factor_bits[::-1]: + v += c * b + c = c * 2.0 + + return int(v) + + +def lcsr_indic(srcw, tgtw, slang, tlang): + """ + compute the Longest Common Subsequence Ratio (LCSR) between two strings at the character level. + This works for Indic scripts by mapping both languages to a common script + + srcw: source language string + tgtw: source language string + slang: source language + tlang: target language + """ + score_mat = np.zeros((len(srcw) + 1, len(tgtw) + 1)) + + for si, sc in enumerate(srcw, 1): + for ti, tc in enumerate(tgtw, 1): + so = get_offset(sc, slang) + to = get_offset(tc, tlang) + + if ( + in_coordinated_range_offset(so) + and in_coordinated_range_offset(to) + and so == to + ): + score_mat[si, ti] = score_mat[si - 1, ti - 1] + 1.0 + elif ( + not (in_coordinated_range_offset(so) or in_coordinated_range_offset(to)) + and sc == tc + ): + score_mat[si, ti] = score_mat[si - 1, ti - 1] + 1.0 + else: + score_mat[si, ti] = max(score_mat[si, ti - 1], score_mat[si - 1, ti]) + + return ( + score_mat[-1, -1] / float(max(len(srcw), len(tgtw))), + float(len(srcw)), + float(len(tgtw)), + ) + + +def lcsr_any(srcw, tgtw): + """ + LCSR computation if both languages have the same script + """ + score_mat = np.zeros((len(srcw) + 1, len(tgtw) + 1)) + + for si, sc in enumerate(srcw, 1): + for ti, tc in enumerate(tgtw, 1): + if sc == tc: + score_mat[si, ti] = score_mat[si - 1, ti - 1] + 1.0 + else: + score_mat[si, ti] = max(score_mat[si, ti - 1], score_mat[si - 1, ti]) + + return ( + score_mat[-1, -1] / float(max(len(srcw), len(tgtw))), + float(len(srcw)), + float(len(tgtw)), + ) + + +def lcsr(srcw, tgtw, slang, tlang): + """ + compute the Longest Common Subsequence Ratio (LCSR) between two strings at the character level. + + srcw: source language string + tgtw: source language string + slang: source language + tlang: target language + """ + + if ( + slang == tlang + or not is_supported_language(slang) + or not is_supported_language(tlang) + ): + return lcsr_any(srcw, tgtw, slang, tlang) + else: + return lcsr_indic(srcw, tgtw) diff --git a/libs/indic_nlp_library/indicnlp/script/phonetic_sim.py b/libs/indic_nlp_library/indicnlp/script/phonetic_sim.py new file mode 100644 index 0000000000000000000000000000000000000000..8acc069ae7b8a77dbc98a35ec9f1c243e9ca5f2e --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/script/phonetic_sim.py @@ -0,0 +1,71 @@ +# +# Copyright (c) 2013-present, Anoop Kunchukuttan +# All rights reserved. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# + +from indicnlp import langinfo +from indicnlp.script.indic_scripts import * +import numpy as np + + +def equal(v1, v2): + return 0.0 if np.sum(xor_vectors(v1, v2)) > 0 else 1.0 + + +def dice(v1, v2): + dotprod = 2 * float(np.dot(v1, v2.T)) + return dotprod / float(len(v1) + len(v2)) + + +def jaccard(v1, v2): + dotprod = float(np.dot(v1, v2.T)) + return dotprod / float(len(v1) + len(v2) - dotprod) + + +def cosine(v1, v2): + dotprod = float(np.dot(v1, v2.T)) + norm1 = float(np.dot(v1, v1.T)) + norm2 = float(np.dot(v2, v2.T)) + return (dotprod) / (np.sqrt(norm1 * norm2) + 0.00001) + + +def dotprod(v1, v2): + return float(np.dot(v1, v2.T)) + + +def sim1(v1, v2, base=5.0): + return np.power(base, dotprod(v1, v2)) + + +def softmax(v1, v2): + return sim1(v1, v2, np.e) + + +def create_similarity_matrix(sim_func, slang, tlang, normalize=True): + dim = ( + langinfo.COORDINATED_RANGE_END_INCLUSIVE + - langinfo.COORDINATED_RANGE_START_INCLUSIVE + + 1 + ) + sim_mat = np.zeros((dim, dim)) + + for offset1 in range( + langinfo.COORDINATED_RANGE_START_INCLUSIVE, + langinfo.COORDINATED_RANGE_END_INCLUSIVE + 1, + ): + v1 = get_phonetic_feature_vector(offset_to_char(offset1, slang), slang) + for offset2 in range( + langinfo.COORDINATED_RANGE_START_INCLUSIVE, + langinfo.COORDINATED_RANGE_END_INCLUSIVE + 1, + ): + v2 = get_phonetic_feature_vector(offset_to_char(offset2, tlang), tlang) + sim_mat[offset1, offset2] = sim_func(v1, v2) + + if normalize: + sums = np.sum(sim_mat, axis=1) + sim_mat = (sim_mat.transpose() / sums).transpose() + + return sim_mat diff --git a/libs/indic_nlp_library/indicnlp/tokenize/__init__.py b/libs/indic_nlp_library/indicnlp/tokenize/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/libs/indic_nlp_library/indicnlp/tokenize/indic_detokenize.py b/libs/indic_nlp_library/indicnlp/tokenize/indic_detokenize.py new file mode 100644 index 0000000000000000000000000000000000000000..719d99f54118c5be2470517d39566478fa5c1c3c --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/tokenize/indic_detokenize.py @@ -0,0 +1,125 @@ +# +# Copyright (c) 2013-present, Anoop Kunchukuttan +# All rights reserved. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# + +# Program for detokenizing Indian language input +# +# @author Anoop Kunchukuttan +# +""" +De-tokenizer for Indian languages. +""" + +import regex as re + +## detokenizer patterns +left_attach = r"!%)\]},.:;>?\u0964\u0965" +pat_la = re.compile(r"[ ]([" + left_attach + r"])") + +right_attach = r"#$(\[{<@" +pat_ra = re.compile(r"([" + right_attach + r"])[ ]") + +lr_attach = r"-/\\" +pat_lra = re.compile(r"[ ]([" + lr_attach + r"])[ ]") + +# donknow=u'&*+=^_|~' + +## date, numbers, section/article numbering +## TODO: handle indic numbers +pat_num_seq = re.compile(r"([0-9]+ [,.:/] )+[0-9]+") + +### e-mail address +# pat_num=re.compile(ur'[a-zA-Z]+[ ]? + + +def trivial_detokenize_indic(text): + """detokenize string for Indian language scripts using Brahmi-derived scripts + + A trivial detokenizer which: + + - decides whether punctuation attaches to left/right or both + - handles number sequences + - handles quotes smartly (deciding left or right attachment) + + Args: + text (str): tokenized text to process + + Returns: + str: detokenized string + """ + + s = text + ### some normalizations + + # numbers and dates + new_s = "" + prev = 0 + for m in pat_num_seq.finditer(s): + start = m.start() + end = m.end() + if start > prev: + new_s = new_s + s[prev:start] + new_s = new_s + s[start:end].replace(" ", "") + prev = end + + new_s = new_s + s[prev:] + s = new_s + + ### consective single quotes or backslashes become double quotes + # s=s.replace("' '", "''") + # s=s.replace("` `", '``') + + s = pat_lra.sub("\\1", s) + s = pat_la.sub("\\1", s) + s = pat_ra.sub("\\1", s) + + # assumes well formedness of quotes and alternates between right and left attach + + alt_attach = "'\"`" + for punc in alt_attach: + cnt = 0 + out_str = [] + for c in s: + if c == punc: + if cnt % 2 == 0: + out_str.append("@RA") + else: + out_str.append("@LA") + cnt += 1 + else: + out_str.append(c) + + s = ( + "".join(out_str) + .replace("@RA ", punc) + .replace(" @LA", punc) + .replace("@RA", punc) + .replace("@LA", punc) + ) + + return s + + +def trivial_detokenize(text, lang="hi"): + """detokenize string for languages of the Indian subcontinent + + A trivial detokenizer which: + + - decides whether punctuation attaches to left/right or both + - handles number sequences + - handles quotes smartly (deciding left or right attachment) + + Args: + text (str): tokenized text to process + + Returns: + str: detokenized string + + Raises: + IndicNlpException: If language is not supported + """ + return trivial_detokenize_indic(text) diff --git a/libs/indic_nlp_library/indicnlp/tokenize/indic_tokenize.py b/libs/indic_nlp_library/indicnlp/tokenize/indic_tokenize.py new file mode 100644 index 0000000000000000000000000000000000000000..d9a4324b3122150ed975fd89f62a8689cfec9de3 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/tokenize/indic_tokenize.py @@ -0,0 +1,114 @@ +# +# Copyright (c) 2013-present, Anoop Kunchukuttan +# All rights reserved. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# + +# Program for tokenizing Indian language input +# +# @author Anoop Kunchukuttan +# +""" +Tokenizer for Indian languages. Currently, simple punctuation-based tokenizers +are supported (see `trivial_tokenize`). Major Indian language punctuations are +handled. +""" +import string +import regex as re + + +### tokenizer patterns +triv_tokenizer_indic_pat = re.compile( + r"([" + + string.punctuation + + r"\u0964\u0965\uAAF1\uAAF0\uABEB\uABEC\uABED\uABEE\uABEF\u1C7E\u1C7F" + + r"])" +) +triv_tokenizer_urdu_pat = re.compile( + r"([" + + string.punctuation + + r"\u0609\u060A\u060C\u061E\u066A\u066B\u066C\u066D\u06D4" + + r"])" +) + +## date, numbers, section/article numbering +pat_num_seq = re.compile(r"([0-9]+ [,.:/] )+[0-9]+") + + +def trivial_tokenize_indic(text): + """tokenize string for Indian language scripts using Brahmi-derived scripts + + A trivial tokenizer which just tokenizes on the punctuation boundaries. + This also includes punctuations for the Indian language scripts (the + purna virama and the deergha virama). This is a language independent + tokenizer + + Args: + text (str): text to tokenize + + Returns: + list: list of tokens + + """ + tok_str = triv_tokenizer_indic_pat.sub(r" \1 ", text.replace("\t", " ")) + # return re.sub(r'[ ]+',' ',tok_str).strip(' ').split(' ') + + s = re.sub(r"[ ]+", " ", tok_str).strip(" ") + + # do not tokenize numbers and dates + new_s = "" + prev = 0 + for m in pat_num_seq.finditer(s): + start = m.start() + end = m.end() + if start > prev: + new_s = new_s + s[prev:start] + new_s = new_s + s[start:end].replace(" ", "") + prev = end + + new_s = new_s + s[prev:] + s = new_s + + return s.split(" ") + + +def trivial_tokenize_urdu(text): + """tokenize Urdu string + + A trivial tokenizer which just tokenizes on the punctuation boundaries. + This also includes punctuations for the Urdu script. + These punctuations characters were identified from the Unicode database + for Arabic script by looking for punctuation symbols. + + Args: + text (str): text to tokenize + + Returns: + list: list of tokens + """ + tok_str = triv_tokenizer_urdu_pat.sub(r" \1 ", text.replace("\t", " ")) + return re.sub(r"[ ]+", " ", tok_str).strip(" ").split(" ") + # from urduhack.tokenization import word_tokenizer + # return word_tokenizer(text) + + +def trivial_tokenize(text, lang="hi"): + """trivial tokenizer for Indian languages using Brahmi for Arabic scripts + + A trivial tokenizer which just tokenizes on the punctuation boundaries. + Major punctuations specific to Indian langauges are handled. + These punctuations characters were identified from the Unicode database. + + Args: + text (str): text to tokenize + lang (str): ISO 639-2 language code + + Returns: + list: list of tokens + """ + if lang == "ur": + return trivial_tokenize_urdu(text) + else: + return trivial_tokenize_indic(text) diff --git a/libs/indic_nlp_library/indicnlp/tokenize/sentence_tokenize.py b/libs/indic_nlp_library/indicnlp/tokenize/sentence_tokenize.py new file mode 100644 index 0000000000000000000000000000000000000000..75b1c97e73b241b112262c6bc0ae5878ecc9b790 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/tokenize/sentence_tokenize.py @@ -0,0 +1,340 @@ +# +# Copyright (c) 2013-present, Anoop Kunchukuttan +# All rights reserved. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# + +# Program for sentence splitting of Indian language input +# +# @author Anoop Kunchukuttan +# +""" +Sentence splitter for Indian languages. Contains a rule-based +sentence splitter that can understand common non-breaking phrases +in many Indian languages. +""" + +import regex as re +from indicnlp.transliterate import unicode_transliterate +from indicnlp import langinfo + + +## for language which have danda as delimiter +## period is not part of the sentence delimiters +DELIM_PAT_DANDA = re.compile(r"[\?!\u0964\u0965]") + +## for languages which don't have danda as delimiter +DELIM_PAT_NO_DANDA = re.compile( + r"[\.\?!\u0964\u0965\uAAF1\uAAF0\uABEB\uABEC\uABED\uABEE\uABEF\u1C7E\u1C7F]" +) + +## pattern to check for presence of danda in text +CONTAINS_DANDA = re.compile(r"[\u0964\u0965]") + +## pattern to check for presence of valid domain characters in text +CONTAINS_VALID_DOMAIN_CHAR = re.compile(r"^[a-zA-Z0-9_-]$") + +## pattern to check for presence of multiple consecutive spaces in text +CONTAINS_MULTIPLE_SPACES = re.compile(" +") + + +def is_latin_or_numeric(character): + """ + Check if a character is a Latin character (uppercase or lowercase) or a number. + + Parameters: + character (str): The character to be checked. + + Returns: + bool: True if the character is a Latin character or a number, False otherwise. + """ + return re.match(CONTAINS_VALID_DOMAIN_CHAR, character) is not None + + +def is_acronym_abbvr(text, lang): + """Is the text a non-breaking phrase + + Args: + text (str): text to check for non-breaking phrase + lang (str): ISO 639-2 language code + + Returns: + boolean: true if `text` is a non-breaking phrase + """ + + ack_chars = { + ## acronym for latin characters + "ए", + "ऎ", + "बी", + "बि", + "सी", + "सि", + "डी", + "डि", + "ई", + "इ", + "एफ", + "ऎफ", + "जी", + "जि", + "एच", + "ऎच", + "आई", + "आइ", + "ऐ", + "जे", + "जॆ", + "के", + "कॆ", + "एल", + "ऎल", + "एम", + "ऎम", + "एन", + "ऎन", + "ओ", + "ऒ", + "पी", + "पि", + "क्यू", + "क्यु", + "आर", + "एस", + "ऎस", + "टी", + "टि", + "यू", + "यु", + "वी", + "वि", + "व्ही", + "व्हि", + "डब्ल्यू", + "डब्ल्यु", + "एक्स", + "ऎक्स", + "वाय", + "जेड", + "ज़ेड", + ## add halant to the previous English character mappings. + "एफ्", + "ऎफ्", + "एच्", + "ऎच्", + "एल्", + "ऎल्", + "एम्", + "ऎम्", + "एन्", + "ऎन्", + "आर्", + "एस्", + "ऎस्", + "एक्स्", + "ऎक्स्", + "वाय्", + "जेड्", + "ज़ेड्", + # Indic vowels + "ऄ", + "अ", + "आ", + "इ", + "ई", + "उ", + "ऊ", + "ऋ", + "ऌ", + "ऍ", + "ऎ", + "ए", + "ऐ", + "ऑ", + "ऒ", + "ओ", + "औ", + "ॠ", + "ॡ", + # Indic consonants + "क", + "ख", + "ग", + "घ", + "ङ", + "च", + "छ", + "ज", + "झ", + "ञ", + "ट", + "ठ", + "ड", + "ढ", + "ण", + "त", + "थ", + "द", + "ध", + "न", + "ऩ", + "प", + "फ", + "ब", + "भ", + "म", + "य", + "र", + "ऱ", + "ल", + "ळ", + "ऴ", + "व", + "श", + "ष", + "स", + "ह", + ## abbreviation + "श्री", + "डॉ", + "कु", + "चि", + "सौ", + } + + return ( + unicode_transliterate.UnicodeIndicTransliterator.transliterate(text, lang, "hi") + in ack_chars + ) + + +def sentence_split(text, lang, delim_pat="auto"): ## New signature + """split the text into sentences + + A rule-based sentence splitter for Indian languages written in + Brahmi-derived scripts. The text is split at sentence delimiter + boundaries. The delimiters can be configured by passing appropriate + parameters. + + The sentence splitter can identify non-breaking phrases like + single letter, common abbreviations/honorofics for some Indian + languages. + + Args: + text (str): text to split into sentence + lang (str): ISO 639-2 language code + delim_pat (str): regular expression to identify sentence delimiter characters. If set to 'auto', the delimiter pattern is chosen automatically based on the language and text. + + + Returns: + list: list of sentences identified from the input text + """ + + if lang == "ur": + from indicnlp.urduhack.tokenization import sentence_tokenizer + + if len(text.split()) < 2: + sentences = text.split() + else: + sentences = sentence_tokenizer(text) + return sentences + + # print('Input: {}'.format(delim_pat)) + if delim_pat == "auto": + if langinfo.is_danda_delim(lang): + # in modern texts it is possible that period is used as delimeter + # instead of DANDA. Hence, a check. Use danda delimiter pattern + # only if text contains at least one danda + if CONTAINS_DANDA.search(text) is None: + delim_pat = DELIM_PAT_NO_DANDA + # print('LANG has danda delim. TEXT_CONTAINS_DANDA: FALSE --> DELIM_PAT_NO_DANDA') + else: + delim_pat = DELIM_PAT_DANDA + # print('LANG has danda delim. TEXT_CONTAINS_DANDA: TRUE --> DELIM_PAT_DANDA') + else: + delim_pat = DELIM_PAT_NO_DANDA + # print('LANG has no danda delim --> DELIM_PAT_NO_DANDA') + + ## otherwise, assume the caller set the delimiter pattern + + ### Phase 1: break on sentence delimiters. + cand_sentences = [] + begin = 0 + text = text.strip() + for mo in delim_pat.finditer(text): + p1 = mo.start() + p2 = mo.end() + + ## NEW + if p1 > 0 and text[p1 - 1].isnumeric(): + continue + + ## Prevents splitting on "." in URLs/emails in indic texts. + if lang != "en": + if is_latin_or_numeric(text[p1 - 1]): + if p1 + 1 < len(text) and is_latin_or_numeric(text[p1 + 1]): + continue + + end = p1 + 1 + s = text[begin:end].strip() + if len(s) > 0: + cand_sentences.append(s) + begin = p1 + 1 + + s = text[begin:].strip() + if len(s) > 0: + cand_sentences.append(s) + + if not delim_pat.search("."): + ## run phase 2 only if delimiter pattern contains period + # print('No need to run phase2') + return cand_sentences + # print(cand_sentences) + # print('====') + + # return cand_sentences + + ### Phase 2: Address the fact that '.' may not always be a sentence delimiter + ### Method: If there is a run of lines containing only a word (optionally) and '.', + ### merge these lines as well one sentence preceding and succeeding this run of lines. + final_sentences = [] + sen_buffer = "" + bad_state = False + + for i, sentence in enumerate(cand_sentences): + words = sentence.split(" ") + # if len(words)<=2 and words[-1]=='.': + if len(words) == 1 and sentence[-1] == ".": + bad_state = True + sen_buffer = sen_buffer + " " + sentence + ## NEW condition + elif sentence[-1] == "." and is_acronym_abbvr(words[-1][:-1], lang): + if len(sen_buffer) > 0 and not bad_state: + final_sentences.append(sen_buffer) + sen_buffer = sentence + else: + sen_buffer = sen_buffer + " " + sentence + bad_state = True + elif bad_state: + sen_buffer = sen_buffer + " " + sentence + if len(sen_buffer) > 0: + final_sentences.append(sen_buffer) + sen_buffer = "" + bad_state = False + else: ## good state + if len(sen_buffer) > 0: + final_sentences.append(sen_buffer) + sen_buffer = sentence + bad_state = False + + if len(sen_buffer) > 0: + final_sentences.append(sen_buffer) + + for i in range(0, len(final_sentences)): + final_sentences[i] = CONTAINS_MULTIPLE_SPACES.sub( + " ", final_sentences[i].strip() + ) + + return final_sentences diff --git a/libs/indic_nlp_library/indicnlp/transliterate/__init__.py b/libs/indic_nlp_library/indicnlp/transliterate/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/libs/indic_nlp_library/indicnlp/transliterate/acronym_transliterator.py b/libs/indic_nlp_library/indicnlp/transliterate/acronym_transliterator.py new file mode 100644 index 0000000000000000000000000000000000000000..bbb9e96b8f50eea0da339f49cfce87b23c925e25 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/transliterate/acronym_transliterator.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2013-present, Anoop Kunchukuttan +# All rights reserved. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# + +# Program to transliterate acronyms from one Latin script to Indic languages +# +# @author Anoop Kunchukuttan +# + +from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator +import random + + +class LatinToIndicAcronymTransliterator(object): + LATIN_TO_DEVANAGARI_TRANSTABLE = str.maketrans( + { + "a": "ए", + "b": "बी", + "c": "सी", + "d": "डी", + "e": "ई", + "f": "एफ", + "g": "जी", + "h": "एच", + "i": "आई", + "j": "जे", + "k": "के", + "l": "एल", + "m": "एम", + "n": "एन", + "o": "ओ", + "p": "पी", + "q": "क्यू", + "r": "आर", + "s": "एस", + "t": "टी", + "u": "यू", + "v": "वी", + "w": "डब्ल्यू", + "x": "एक्स", + "y": "वाय", + "z": "जेड", + } + ) + + # a_unichr=ord('a') + # alphabet = [ chr(a_unichr+n) for n in range(26) ] + LATIN_ALPHABET = [ + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + ] + + @staticmethod + def get_transtable(): + return LatinToIndicAcronymTransliterator.LATIN_TO_DEVANAGARI_TRANSTABLE + + @staticmethod + def transliterate(w, lang): + return UnicodeIndicTransliterator.transliterate( + w.lower().translate( + LatinToIndicAcronymTransliterator.LATIN_TO_DEVANAGARI_TRANSTABLE + ), + "hi", + lang, + ) + + @staticmethod + def generate_latin_acronyms(num_acronyms, min_len=2, max_len=6, strategy="random"): + """ + generate Latin acronyms in lower case + """ + + def sample_acronym(strategy="random"): + if strategy == "random": + slen = random.randint(min_len, max_len) + return "".join( + random.choices( + LatinToIndicAcronymTransliterator.LATIN_ALPHABET, k=slen + ) + ) + + return [sample_acronym(strategy) for i in range(num_acronyms)] diff --git a/libs/indic_nlp_library/indicnlp/transliterate/script_unifier.py b/libs/indic_nlp_library/indicnlp/transliterate/script_unifier.py new file mode 100644 index 0000000000000000000000000000000000000000..4fc3fff97fad2ee874ccda11c334d3650fa7ecad --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/transliterate/script_unifier.py @@ -0,0 +1,193 @@ +# +# Copyright (c) 2013-present, Anoop Kunchukuttan +# All rights reserved. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# + +# Program for normalization of text written in Unicode. This is mainly geared towards Indic scripts +# +# @author Anoop Kunchukuttan +# + +import sys +from indicnlp.normalize import indic_normalize +from indicnlp.transliterate import unicode_transliterate +from indicnlp import loader + + +class AggressiveScriptUnifier: + def __init__(self, common_lang="hi", nasals_mode="to_nasal_consonants"): + self.common_lang = common_lang + self.nasals_mode = nasals_mode + self.do_normalize_chandras = True + self.do_normalize_vowel_ending = True + self.remove_nuktas = True + self.normalizer_map = {} + self._init_normalizers() + + def _init_normalizers(self): + normalizer_factory = indic_normalize.IndicNormalizerFactory() + + ## for languages with common parameters + for lang in ["hi", "mr", "sa", "kK", "ne", "sd", "bn", "gu", "ta", "te", "kn"]: + self.normalizer_map[lang] = normalizer_factory.get_normalizer( + lang, + nasals_mode=self.nasals_mode, + do_normalize_chandras=self.do_normalize_chandras, + remove_nuktas=self.remove_nuktas, + do_normalize_vowel_ending=self.do_normalize_vowel_ending, + ) + + ## for languages with language specific parameters + self.normalizer_map["pa"] = normalizer_factory.get_normalizer( + "pa", + nasals_mode=self.nasals_mode, + do_normalize_chandras=self.do_normalize_chandras, + remove_nuktas=self.remove_nuktas, + do_normalize_vowel_ending=self.do_normalize_vowel_ending, + do_canonicalize_addak=True, + do_canonicalize_tippi=True, + do_replace_vowel_bases=True, + ) + self.normalizer_map["or"] = normalizer_factory.get_normalizer( + "or", + nasals_mode=self.nasals_mode, + do_normalize_chandras=self.do_normalize_chandras, + remove_nuktas=self.remove_nuktas, + do_normalize_vowel_ending=self.do_normalize_vowel_ending, + do_remap_wa=True, + ) + self.normalizer_map["as"] = normalizer_factory.get_normalizer( + "as", + nasals_mode=self.nasals_mode, + do_normalize_chandras=self.do_normalize_chandras, + remove_nuktas=self.remove_nuktas, + do_normalize_vowel_ending=self.do_normalize_vowel_ending, + do_remap_assamese_chars=True, + ) + self.normalizer_map["ml"] = normalizer_factory.get_normalizer( + "ml", + nasals_mode=self.nasals_mode, + do_normalize_chandras=self.do_normalize_chandras, + remove_nuktas=self.remove_nuktas, + do_normalize_vowel_ending=self.do_normalize_vowel_ending, + do_canonicalize_chillus=True, + do_correct_geminated_T=True, + ) + + def transform(self, text, lang): + text = self.normalizer_map[lang].normalize(text) + text = unicode_transliterate.UnicodeIndicTransliterator.transliterate( + text, lang, self.common_lang + ) + return text + + +class BasicScriptUnifier: + def __init__(self, common_lang="hi", nasals_mode="do_nothing"): + self.common_lang = common_lang + self.nasals_mode = nasals_mode + self.normalizer_map = {} + self._init_normalizers() + + def _init_normalizers(self): + normalizer_factory = indic_normalize.IndicNormalizerFactory() + + for lang in [ + "hi", + "mr", + "sa", + "kK", + "ne", + "sd", + "bn", + "gu", + "ta", + "te", + "kn", + "pa", + "or", + "as", + "ml", + ]: + self.normalizer_map[lang] = normalizer_factory.get_normalizer( + lang, nasals_mode=self.nasals_mode + ) + + def transform(self, text, lang): + if lang in self.normalizer_map: + text = self.normalizer_map[lang].normalize(text) + + text = unicode_transliterate.UnicodeIndicTransliterator.transliterate( + text, lang, self.common_lang + ) + return text + + +class NaiveScriptUnifier: + def __init__(self, common_lang="hi"): + self.common_lang = common_lang + + def transform(self, text, lang): + text = unicode_transliterate.UnicodeIndicTransliterator.transliterate( + text, lang, self.common_lang + ) + return text + + +if __name__ == "__main__": + loader.load() + + if len(sys.argv) <= 4: + print("Usage: python script_unifier ") + sys.exit(1) + + if sys.argv[1] == "aggressive": + language = sys.argv[4] + + unifier = AggressiveScriptUnifier(nasals_mode="to_nasal_consonants") + + with open(sys.argv[2], "r", encoding="utf-8") as ifile: + with open(sys.argv[3], "w", encoding="utf-8") as ofile: + for i, line in enumerate(ifile.readlines()): + line = line.strip() + transliterated_line = unifier.transform(line, language) + ofile.write(transliterated_line + "\n") + + elif sys.argv[1] == "moderate": + language = sys.argv[4] + + unifier = AggressiveScriptUnifier(nasals_mode="do_nothing") + + with open(sys.argv[2], "r", encoding="utf-8") as ifile: + with open(sys.argv[3], "w", encoding="utf-8") as ofile: + for i, line in enumerate(ifile.readlines()): + line = line.strip() + transliterated_line = unifier.transform(line, language) + ofile.write(transliterated_line + "\n") + + elif sys.argv[1] == "basic": + language = sys.argv[4] + + unifier = BasicScriptUnifier() + + with open(sys.argv[2], "r", encoding="utf-8") as ifile: + with open(sys.argv[3], "w", encoding="utf-8") as ofile: + for i, line in enumerate(ifile.readlines()): + line = line.strip() + transliterated_line = unifier.transform(line, language) + ofile.write(transliterated_line + "\n") + + elif sys.argv[1] == "naive": + language = sys.argv[4] + + unifier = NaiveScriptUnifier() + + with open(sys.argv[2], "r", encoding="utf-8") as ifile: + with open(sys.argv[3], "w", encoding="utf-8") as ofile: + for i, line in enumerate(ifile.readlines()): + line = line.strip() + transliterated_line = unifier.transform(line, language) + ofile.write(transliterated_line + "\n") diff --git a/libs/indic_nlp_library/indicnlp/transliterate/unicode_transliterate.py b/libs/indic_nlp_library/indicnlp/transliterate/unicode_transliterate.py new file mode 100644 index 0000000000000000000000000000000000000000..9bbb013abb576293e9443f4dbf8373a16aa7258c --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/transliterate/unicode_transliterate.py @@ -0,0 +1,350 @@ +# +# Copyright (c) 2013-present, Anoop Kunchukuttan +# All rights reserved. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# + +# Program for text written in one Indic script to another based on Unicode mappings. +# +# @author Anoop Kunchukuttan +# + +import sys +import os +from collections import defaultdict + +from indicnlp import common +from indicnlp import langinfo +from indicnlp.script import indic_scripts as isc +import pandas as pd + +OFFSET_TO_ITRANS = {} +ITRANS_TO_OFFSET = defaultdict(list) + +DUPLICATE_ITRANS_REPRESENTATIONS = {} + + +def init(): + """ + To be called by library loader, do not call it in your program + """ + + ### Load the ITRANS-script offset map. The map was initially generated using the snippet below (uses the old itrans transliterator) + ### The map is modified as needed to accomodate extensions and corrections to the mappings + # + # base=0x900 + # l=[] + # for i in range(0,0x80): + # c=chr(base+i) + # itrans=ItransTransliterator.to_itrans(c,'hi') + # l.append((hex(i),c,itrans)) + # print(l) + # + # pd.DataFrame(l,columns=['offset_hex','devnag_char','itrans']).to_csv('offset_itrans_map.csv',index=False,encoding='utf-8') + + itrans_map_fname = os.path.join( + common.get_resources_path(), "transliterate", "offset_itrans_map.csv" + ) + itrans_df = pd.read_csv(itrans_map_fname, encoding="utf-8") + + global OFFSET_TO_ITRANS, ITRANS_TO_OFFSET, DUPLICATE_ITRANS_REPRESENTATIONS + + for r in itrans_df.iterrows(): + itrans = r[1]["itrans"] + o = int(r[1]["offset_hex"], base=16) + + OFFSET_TO_ITRANS[o] = itrans + + if langinfo.is_consonant_offset(o): + ### for consonants, strip the schwa - add halant offset + ITRANS_TO_OFFSET[itrans[:-1]].extend([o, 0x4D]) + else: + ### the append assumes that the maatra always comes after independent vowel in the df + ITRANS_TO_OFFSET[itrans].append(o) + + DUPLICATE_ITRANS_REPRESENTATIONS = { + "A": "aa", + "I": "ii", + "U": "uu", + "RRi": "R^i", + "RRI": "R^I", + "LLi": "L^i", + "LLI": "L^I", + "L": "ld", + "w": "v", + "x": "kSh", + "gj": "j~n", + "dny": "j~n", + ".n": ".m", + "M": ".m", + "OM": "AUM", + } + + +class UnicodeIndicTransliterator(object): + """ + Base class for rule-based transliteration among Indian languages. + + Script pair specific transliterators should derive from this class and override the transliterate() method. + They can call the super class 'transliterate()' method to avail of the common transliteration + """ + + @staticmethod + def _correct_tamil_mapping(offset): + # handle missing unaspirated and voiced plosives in Tamil script + # replace by unvoiced, unaspirated plosives + + # for first 4 consonant rows of varnamala + # exception: ja has a mapping in Tamil + if ( + offset >= 0x15 + and offset <= 0x28 + and offset != 0x1C + and not ((offset - 0x15) % 5 == 0 or (offset - 0x15) % 5 == 4) + ): + subst_char = (offset - 0x15) // 5 + offset = 0x15 + 5 * subst_char + + # for 5th consonant row of varnamala + if offset in [0x2B, 0x2C, 0x2D]: + offset = 0x2A + + # 'sh' becomes 'Sh' + if offset == 0x36: + offset = 0x37 + + return offset + + @staticmethod + def transliterate(text, lang1_code, lang2_code): + """ + convert the source language script (lang1) to target language script (lang2) + + text: text to transliterate + lang1_code: language 1 code + lang1_code: language 2 code + """ + if ( + lang1_code in langinfo.SCRIPT_RANGES + and lang2_code in langinfo.SCRIPT_RANGES + ): + trans_lit_text = [] + for c in text: + newc = c + offset = ord(c) - langinfo.SCRIPT_RANGES[lang1_code][0] + if ( + offset >= langinfo.COORDINATED_RANGE_START_INCLUSIVE + and offset <= langinfo.COORDINATED_RANGE_END_INCLUSIVE + and c != "\u0964" + and c != "\u0965" + ): + if lang2_code == "ta": + # tamil exceptions + offset = UnicodeIndicTransliterator._correct_tamil_mapping( + offset + ) + newc = chr(langinfo.SCRIPT_RANGES[lang2_code][0] + offset) + + trans_lit_text.append(newc) + + return "".join(trans_lit_text) + else: + return text + + +class ItransTransliterator(object): + """ + Transliterator between Indian scripts and ITRANS + """ + + @staticmethod + def to_itrans(text, lang_code): + if lang_code in langinfo.SCRIPT_RANGES: + if lang_code == "ml": + # Change from chillus characters to corresponding consonant+halant + text = text.replace("\u0d7a", "\u0d23\u0d4d") + text = text.replace("\u0d7b", "\u0d28\u0d4d") + text = text.replace("\u0d7c", "\u0d30\u0d4d") + text = text.replace("\u0d7d", "\u0d32\u0d4d") + text = text.replace("\u0d7e", "\u0d33\u0d4d") + text = text.replace("\u0d7f", "\u0d15\u0d4d") + + offsets = [isc.get_offset(c, lang_code) for c in text] + + ### naive lookup + # itrans_l = [ OFFSET_TO_ITRANS.get(o, '-' ) for o in offsets ] + itrans_l = [] + for o in offsets: + itrans = OFFSET_TO_ITRANS.get( + o, chr(langinfo.SCRIPT_RANGES[lang_code][0] + o) + ) + if langinfo.is_halanta_offset(o): + itrans = "" + if len(itrans_l) > 0: + itrans_l.pop() + elif langinfo.is_vowel_sign_offset(o) and len(itrans_l) > 0: + itrans_l.pop() + itrans_l.extend(itrans) + + return "".join(itrans_l) + + else: + return text + + @staticmethod + def from_itrans(text, lang): + """ + TODO: Document this method properly + TODO: A little hack is used to handle schwa: needs to be documented + TODO: check for robustness + """ + + MAXCODE = 4 ### TODO: Needs to be fixed + + ## handle_duplicate_itrans_representations + for k, v in DUPLICATE_ITRANS_REPRESENTATIONS.items(): + if k in text: + text = text.replace(k, v) + + start = 0 + match = None + solution = [] + + i = start + 1 + while i <= len(text): + itrans = text[start:i] + + # print('===') + # print('i: {}'.format(i)) + # if i 0 and langinfo.is_halanta(solution[-1], lang): + offs = [offs[1]] ## dependent vowel + else: + offs = [offs[0]] ## independent vowel + + c = "".join([langinfo.offset_to_char(x, lang) for x in offs]) + match = (i, c) + + elif len(itrans) == 1: ## unknown character + match = (i, itrans) + elif ( + i < len(text) and (i - start) < MAXCODE + 1 + ): ## continue matching till MAXCODE length substring + i = i + 1 + continue + else: + solution.extend(match[1]) + # start=i-1 + start = match[0] + i = start + match = None + # print('match done') + + # print('match: {}'.format(match)) + + i = i + 1 + + ### flush matches + if match is not None: + solution.extend(match[1]) + + #### post-processing + ## delete unecessary halants + # print(''.join(solution)) + temp_out = list("".join(solution)) + rem_indices = [] + for i in range(len(temp_out) - 1): + if langinfo.is_halanta(temp_out[i], lang) and ( + langinfo.is_vowel_sign(temp_out[i + 1], lang) + or langinfo.is_nukta(temp_out[i + 1], lang) + or temp_out[i + 1] == langinfo.offset_to_char(0x7F, lang) + ): + rem_indices.append(i) + # if temp_out[i]==langinfo.offset_to_char(0x7f,lang): + # rem_indices.append(i) + for i in reversed(rem_indices): + temp_out.pop(i) + + out = "".join(temp_out) + + ## delete schwa placeholder + out = out.replace(langinfo.offset_to_char(0x7F, lang), "") + + return out + + +if __name__ == "__main__": + if len(sys.argv) < 4: + print( + "Usage: python unicode_transliterate.py " + ) + sys.exit(1) + + if sys.argv[1] == "transliterate": + src_language = sys.argv[4] + tgt_language = sys.argv[5] + + with open(sys.argv[2], "r", encoding="utf-8") as ifile: + with open(sys.argv[3], "w", encoding="utf-8") as ofile: + for line in ifile.readlines(): + transliterated_line = UnicodeIndicTransliterator.transliterate( + line, src_language, tgt_language + ) + ofile.write(transliterated_line) + + elif sys.argv[1] == "romanize": + language = sys.argv[4] + + ### temp fix to replace anusvara with corresponding nasal + # r1_nasal=re.compile(ur'\u0902([\u0915-\u0918])') + # r2_nasal=re.compile(ur'\u0902([\u091a-\u091d])') + # r3_nasal=re.compile(ur'\u0902([\u091f-\u0922])') + # r4_nasal=re.compile(ur'\u0902([\u0924-\u0927])') + # r5_nasal=re.compile(ur'\u0902([\u092a-\u092d])') + + with open(sys.argv[2], "r", encoding="utf-8") as ifile: + with open(sys.argv[3], "w", encoding="utf-8") as ofile: + for line in ifile.readlines(): + ### temp fix to replace anusvara with corresponding nasal + # line=r1_nasal.sub(u'\u0919\u094D\\1',line) + # line=r2_nasal.sub(u'\u091e\u094D\\1',line) + # line=r3_nasal.sub(u'\u0923\u094D\\1',line) + # line=r4_nasal.sub(u'\u0928\u094D\\1',line) + # line=r5_nasal.sub(u'\u092e\u094D\\1',line) + + transliterated_line = ItransTransliterator.to_itrans(line, language) + + ## temp fix to replace 'ph' to 'F' to match with Urdu transliteration scheme + transliterated_line = transliterated_line.replace("ph", "f") + + ofile.write(transliterated_line) + + elif sys.argv[1] == "indicize": + language = sys.argv[4] + + with open(sys.argv[2], "r", encoding="utf-8") as ifile: + with open(sys.argv[3], "w", encoding="utf-8") as ofile: + for line in ifile.readlines(): + transliterated_line = ItransTransliterator.from_itrans( + line, language + ) + ofile.write(transliterated_line) diff --git a/libs/indic_nlp_library/indicnlp/urduhack/__init__.py b/libs/indic_nlp_library/indicnlp/urduhack/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2bc81a7c4b7d13c294fc795dc851241f982ab840 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/urduhack/__init__.py @@ -0,0 +1,6 @@ +# coding: utf8 +"""Project Entry point""" +from .normalization import normalize +from .tokenization import sentence_tokenizer + +__all__ = ["normalize", "sentence_tokenizer"] diff --git a/libs/indic_nlp_library/indicnlp/urduhack/normalization/__init__.py b/libs/indic_nlp_library/indicnlp/urduhack/normalization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bd62f08dc0a318d32ccb3d7837cb11d16ec98576 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/urduhack/normalization/__init__.py @@ -0,0 +1,47 @@ +# coding: utf8 +""" +Normalization +============== + +The normalization of Urdu text is necessary to make it useful for the machine +learning tasks. In the :py:mod:`~urduhack.normalization.normalize` module, the very basic +problems faced when working with Urdu data are handled with ease and +efficiency. All the problems and how :py:mod:`~urduhack.normalization.normalize` module handles +them are listed below. + +This modules fixes the problem of correct encodings for the Urdu characters as well as replace Arabic +characters with correct Urdu characters. This module brings all the characters in the specified unicode range +(0600-06FF) for Urdu language. + +It also fixes the problem of joining of different Urdu words. By joining we mean that when space between two Urdu words +is removed, they must not make a new word. Their rendering must not change and even after the removal of space +they should look the same. + +You can use the library to normalize the Urdu text for correct unicode characters. +By normalization we mean to end the confusion between Urdu and Arabic characters, +to replace two words with one word keeping in mind the context they are used in. +Like the character 'ﺁ' and 'ﺂ' are to be replaced by 'آ'. All this is done using regular expressions. + +The normalization of Urdu text is necessary to make it useful for the machine learning tasks. +This module provides the following functionality: + + - Normalizing Single Characters + - Normalizing Combine Characters + - Removal of Diacritics from Urdu Text + - Replace all digits with Urdu and vice versa English +""" +from .character import ( + normalize_characters, + normalize_combine_characters, + remove_diacritics, + replace_digits, + normalize, +) + +__all__ = [ + "normalize", + "normalize_characters", + "normalize_combine_characters", + "remove_diacritics", + "replace_digits", +] diff --git a/libs/indic_nlp_library/indicnlp/urduhack/normalization/character.py b/libs/indic_nlp_library/indicnlp/urduhack/normalization/character.py new file mode 100644 index 0000000000000000000000000000000000000000..c5dd08fb4df9236829bbf17abc27c75557b6a0d9 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/urduhack/normalization/character.py @@ -0,0 +1,358 @@ +# coding: utf8 +""" +Character Normalization functions +provides functionality to put proper spaces before and after numeric digits, urdu digits +and punctuations. +""" +from typing import Dict, List +import logging + +from .regexes import _DIACRITICS_RE +from .regexes import _SPACE_AFTER_PUNCTUATIONS_RE, _REMOVE_SPACE_BEFORE_PUNCTUATIONS_RE + +logger = logging.getLogger(__name__) + +# Contains wrong Urdu characters mapping to correct characters +_CORRECT_URDU_CHARACTERS_MAPPING: Dict[str, List[str]] = { + "آ": ["ﺁ", "ﺂ"], + "أ": ["ﺃ"], + "ا": [ + "ﺍ", + "ﺎ", + ], + "ب": ["ﺏ", "ﺐ", "ﺑ", "ﺒ"], + "پ": [ + "ﭖ", + "ﭘ", + "ﭙ", + ], + "ت": ["ﺕ", "ﺖ", "ﺗ", "ﺘ"], + "ٹ": ["ﭦ", "ﭧ", "ﭨ", "ﭩ"], + "ث": ["ﺛ", "ﺜ", "ﺚ"], + "ج": ["ﺝ", "ﺞ", "ﺟ", "ﺠ"], + "ح": ["ﺡ", "ﺣ", "ﺤ", "ﺢ"], + "خ": ["ﺧ", "ﺨ", "ﺦ"], + "د": ["ﺩ", "ﺪ"], + "ذ": ["ﺬ", "ﺫ"], + "ر": ["ﺭ", "ﺮ"], + "ز": [ + "ﺯ", + "ﺰ", + ], + "س": [ + "ﺱ", + "ﺲ", + "ﺳ", + "ﺴ", + ], + "ش": ["ﺵ", "ﺶ", "ﺷ", "ﺸ"], + "ص": [ + "ﺹ", + "ﺺ", + "ﺻ", + "ﺼ", + ], + "ض": ["ﺽ", "ﺾ", "ﺿ", "ﻀ"], + "ط": ["ﻃ", "ﻄ"], + "ظ": ["ﻅ", "ﻇ", "ﻈ"], + "ع": [ + "ﻉ", + "ﻊ", + "ﻋ", + "ﻌ", + ], + "غ": [ + "ﻍ", + "ﻏ", + "ﻐ", + ], + "ف": [ + "ﻑ", + "ﻒ", + "ﻓ", + "ﻔ", + ], + "ق": [ + "ﻕ", + "ﻖ", + "ﻗ", + "ﻘ", + ], + "ل": [ + "ﻝ", + "ﻞ", + "ﻟ", + "ﻠ", + ], + "م": [ + "ﻡ", + "ﻢ", + "ﻣ", + "ﻤ", + ], + "ن": [ + "ﻥ", + "ﻦ", + "ﻧ", + "ﻨ", + ], + "چ": ["ﭺ", "ﭻ", "ﭼ", "ﭽ"], + "ڈ": ["ﮈ", "ﮉ"], + "ڑ": ["ﮍ", "ﮌ"], + "ژ": [ + "ﮋ", + ], + "ک": ["ﮎ", "ﮏ", "ﮐ", "ﮑ", "ﻛ", "ك"], + "گ": ["ﮒ", "ﮓ", "ﮔ", "ﮕ"], + "ں": ["ﮞ", "ﮟ"], + "و": [ + "ﻮ", + "ﻭ", + "ﻮ", + ], + "ؤ": ["ﺅ"], + "ھ": ["ﮪ", "ﮬ", "ﮭ", "ﻬ", "ﻫ", "ﮫ"], + "ہ": [ + "ﻩ", + "ﮦ", + "ﻪ", + "ﮧ", + "ﮩ", + "ﮨ", + "ه", + ], + "ۂ": [], + "ۃ": ["ة"], + "ء": ["ﺀ"], + "ی": ["ﯼ", "ى", "ﯽ", "ﻰ", "ﻱ", "ﻲ", "ﯾ", "ﯿ", "ي"], + "ئ": [ + "ﺋ", + "ﺌ", + ], + "ے": [ + "ﮮ", + "ﮯ", + "ﻳ", + "ﻴ", + ], + "ۓ": [], + "۰": ["٠"], + "۱": ["١"], + "۲": ["٢"], + "۳": ["٣"], + "۴": ["٤"], + "۵": ["٥"], + "۶": ["٦"], + "۷": ["٧"], + "۸": ["٨"], + "۹": ["٩"], + "۔": [], + "؟": [], + "٫": [], + "،": [], + "لا": ["ﻻ", "ﻼ"], + "": ["ـ"], +} + +_TRANSLATOR = {} +for key, value in _CORRECT_URDU_CHARACTERS_MAPPING.items(): + _TRANSLATOR.update(dict.fromkeys(map(ord, value), key)) + + +def normalize_characters(text: str) -> str: + """ + The most important module in the UrduHack is the :py:mod:`~urduhack.normalization.character` module, + defined in the module with the same name. You can use this module separately to normalize + a piece of text to a proper specified Urdu range (0600-06FF). To get an understanding of how this module works, one + needs to understand unicode. Every character has a unicode. You can search for any character unicode from any + language you will find it. No two characters can have the same unicode. This module works with reference to the + unicodes. Now as urdu language has its roots in Arabic, Parsian and Turkish. So we have to deal with all those + characters and convert them to a normal urdu character. To get a bit more of what the above explanation means is.:: + + >>> all_fes = ['ﻑ', 'ﻒ', 'ﻓ', 'ﻔ', ] + >>> urdu_fe = 'ف' + + All the characters in all_fes are same but they come from different languages and they all have different unicodes. + Now as computers deal with numbers, same character appearing in more than one place in a different language will + have a different unicode and that will create confusion which will create problems in understanding the context of + the data. :py:mod:`~character` module will eliminate this problem by replacing all the characters in all_fes by + urdu_fe. + + This provides the functionality to replace wrong arabic characters with correct urdu characters and fixed the + combine|join characters issue. + + Replace ``urdu`` text characters with correct ``unicode`` characters. + + Args: + text : ``Urdu`` text + Returns: + str: Returns a ``str`` object containing normalized text. + Examples: + >>> from urduhack.normalization import normalize_characters + >>> # Text containing characters from Arabic Unicode block + >>> _text = "مجھ کو جو توڑا ﮔیا تھا" + >>> normalized_text = normalize_characters(_text) + >>> # Normalized text - Arabic characters are now replaced with Urdu characters + >>> normalized_text + مجھ کو جو توڑا گیا تھا + """ + return text.translate(_TRANSLATOR) + + +COMBINE_URDU_CHARACTERS: Dict[str, str] = { + "آ": "آ", + "أ": "أ", + "ۓ": "ۓ", +} + + +# Issue to be resolved: Words like کیجئے and کیجیے appear in the same context but they have different unicodes. +# We cannot merge them neither can we have them separately. Because if we decompose ئ, +# we get unicode that are not available in our unicode list. + + +def normalize_combine_characters(text: str) -> str: + """ + To normalize combine characters with single character unicode text, use the + :py:func:`~urduhack.normalization.character.normalize_combine_characters` function in the + :py:mod:`~urduhack.normalization.character` module. + + Replace combine|join ``urdu`` characters with single unicode character + + Args: + text : ``Urdu`` text + Returns: + str: Returns a ``str`` object containing normalized text. + Examples: + >>> from urduhack.normalization import normalize_combine_characters + >>> # In the following string, Alif ('ا') and Hamza ('ٔ ') are separate characters + >>> _text = "جرأت" + >>> normalized_text = normalize_combine_characters(_text) + >>> # Now Alif and Hamza are replaced by a Single Urdu Unicode Character! + >>> normalized_text + جرأت + """ + for _key, _value in COMBINE_URDU_CHARACTERS.items(): + text = text.replace(_key, _value) + return text + + +def punctuations_space(text: str) -> str: + """ + Add spaces after punctuations used in ``urdu`` writing + + Args: + text : ``Urdu`` text + Returns: + str: Returns a ``str`` object containing normalized text. + Examples: + >>> from urduhack.normalization.character import punctuations_space + >>> _text = "ہوتا ہے ۔ ٹائپ" + >>> normalized_text = punctuations_space(_text) + >>> normalized_text + ہوتا ہے۔ ٹائپ + """ + text = _SPACE_AFTER_PUNCTUATIONS_RE.sub(" ", text) + text = _REMOVE_SPACE_BEFORE_PUNCTUATIONS_RE.sub(r"\1", text) + return text + + +def remove_diacritics(text: str) -> str: + """ + Remove ``urdu`` diacritics from text. It is an important step in pre-processing of the Urdu data. + This function returns a String object which contains the original text minus Urdu diacritics. + + Args: + text : ``Urdu`` text + Returns: + str: Returns a ``str`` object containing normalized text. + Examples: + >>> from urduhack.normalization import remove_diacritics + >>> _text = "شیرِ پنجاب" + >>> normalized_text = remove_diacritics(_text) + >>> normalized_text + شیر پنجاب + """ + return _DIACRITICS_RE.sub("", text) + + +ENG_URDU_DIGITS_MAP: Dict = { + "0": ["۰"], + "1": ["۱"], + "2": ["۲"], + "3": ["۳"], + "4": ["۴"], + "5": ["۵"], + "6": ["۶"], + "7": ["۷"], + "8": ["۸"], + "9": ["۹"], +} + +_ENG_DIGITS_TRANSLATOR = {} +for key, value in ENG_URDU_DIGITS_MAP.items(): + _ENG_DIGITS_TRANSLATOR.update(dict.fromkeys(map(ord, value), key)) + +URDU_ENG_DIGITS_MAP: Dict = { + "۰": ["0"], + "۱": ["1"], + "۲": ["2"], + "۳": ["3"], + "۴": ["4"], + "۵": ["5"], + "۶": ["6"], + "۷": ["7"], + "۸": ["8"], + "۹": ["9"], +} + +_URDU_DIGITS_TRANSLATOR = {} +for key, value in URDU_ENG_DIGITS_MAP.items(): + _URDU_DIGITS_TRANSLATOR.update(dict.fromkeys(map(ord, value), key)) + + +def replace_digits(text: str, with_english: bool = True) -> str: + """ + Replace urdu digits with English digits and vice versa + + Args: + text : Urdu text string + with_english (bool): Boolean to convert digits from one language to other + Returns: + Text string with replaced digits + """ + if with_english: + return text.translate(_ENG_DIGITS_TRANSLATOR) + return text.translate(_URDU_DIGITS_TRANSLATOR) + + +def normalize(text: str) -> str: + """ + To normalize some text, all you need to do pass ``Urdu`` text. It will return a ``str`` + with normalized characters both single and combined, proper spaces after digits and punctuations + and diacritics removed. + + Args: + text : ``Urdu`` text + Returns: + str: Normalized ``Urdu`` text + Raises: + TypeError: If text param is not not str Type. + Examples: + >>> from urduhack import normalize + >>> _text = "اَباُوگل پاکستان ﻤﯿﮟ 20 سال ﺳﮯ ، وسائل کی کوئی کمی نہیں ﮨﮯ۔" + >>> normalized_text = normalize(_text) + >>> # The text now contains proper spaces after digits and punctuations, + >>> # normalized characters and no diacritics! + >>> normalized_text + اباوگل پاکستان ﻤﯿﮟ 20 سال ﺳﮯ ، وسائل کی کوئی کمی نہیں ﮨﮯ۔ + """ + if not isinstance(text, str): + raise TypeError("Text must be str type.") + + logger.info("Normalizing the text.") + + text = remove_diacritics(text) + text = normalize_characters(text) + text = normalize_combine_characters(text) + return text diff --git a/libs/indic_nlp_library/indicnlp/urduhack/normalization/regexes.py b/libs/indic_nlp_library/indicnlp/urduhack/normalization/regexes.py new file mode 100644 index 0000000000000000000000000000000000000000..3d312b29488dfad9f4805d6bad036df8f0767b68 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/urduhack/normalization/regexes.py @@ -0,0 +1,21 @@ +# coding: utf8 +"""List of Regex""" + +import regex as re + +from indicnlp.urduhack.urdu_characters import URDU_PUNCTUATIONS, URDU_DIACRITICS + +# Add spaces after ., if there is number then not Ex (9.00) +_SPACE_AFTER_PUNCTUATIONS_RE = re.compile( + r"(?<=[" + + "".join(URDU_PUNCTUATIONS) + + "])(?=[^" + + "".join(URDU_PUNCTUATIONS) + + "0-9 \n])", + flags=re.U | re.M | re.I, +) +_REMOVE_SPACE_BEFORE_PUNCTUATIONS_RE = re.compile( + r"\s+([" + "".join(URDU_PUNCTUATIONS) + "])", flags=re.U | re.M | re.I +) + +_DIACRITICS_RE = re.compile(f'[{"".join(URDU_DIACRITICS)}]', flags=re.U | re.M | re.I) diff --git a/libs/indic_nlp_library/indicnlp/urduhack/preprocessing/__init__.py b/libs/indic_nlp_library/indicnlp/urduhack/preprocessing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a473a32a2931d5bd491979ae45ab534261c299a --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/urduhack/preprocessing/__init__.py @@ -0,0 +1,56 @@ +# coding: utf8 +""" +Text PreProcessing +=================== + +The pre-processing of Urdu text is necessary to make it useful for the machine +learning tasks. +This module provides the following functionality: + + - Normalize whitespace + - Put Spaces Before & After Digits + - Put Spaces Before & After English Words + - Put Spaces Before & After Urdu Punctuations + - Replace urls + - Replace emails + - Replace number + - Replace phone_number + - Replace currency_symbols + +You can look for all the different functions that come with pre-process +module in the reference here :py:mod:`~urduhack.preprocess`. + +""" +from .character import ( + digits_space, + english_characters_space, + all_punctuations_space, + preprocess, +) +from .util import ( + normalize_whitespace, + replace_urls, + replace_emails, + replace_numbers, + replace_phone_numbers, + replace_currency_symbols, + remove_punctuation, + remove_accents, + remove_english_alphabets, +) + +__all__ = [ + "digits_space", + "english_characters_space", + "all_punctuations_space", + "preprocess", + "normalize_whitespace", + "remove_punctuation", + "remove_accents", + "replace_urls", + "replace_emails", + "replace_numbers", + "replace_phone_numbers", + "replace_currency_symbols", + "remove_english_alphabets", +] diff --git a/libs/indic_nlp_library/indicnlp/urduhack/preprocessing/character.py b/libs/indic_nlp_library/indicnlp/urduhack/preprocessing/character.py new file mode 100644 index 0000000000000000000000000000000000000000..9cbd7d84131ef5d6279781faadfd8df720bc910b --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/urduhack/preprocessing/character.py @@ -0,0 +1,97 @@ +# coding: utf8 +""" +Urduhack Character preprocess functions +""" + +from .regexes import _SPACE_AFTER_ALL_PUNCTUATIONS_RE, _SPACE_BEFORE_ALL_PUNCTUATIONS_RE +from .regexes import _SPACE_AFTER_DIGITS_RE, _SPACE_BEFORE_DIGITS_RE +from .regexes import _SPACE_BEFORE_ENG_CHAR_RE, _SPACE_AFTER_ENG_CHAR_RE + + +def digits_space(text: str) -> str: + """ + Add spaces before|after numeric and urdu digits + + Args: + text (str): ``Urdu`` text + Returns: + str: Returns a ``str`` object containing normalized text. + Examples: + >>> from urduhack.preprocessing import digits_space + >>> text = "20فیصد" + >>> normalized_text = digits_space(text) + >>> normalized_text + 20 فیصد + """ + text = _SPACE_BEFORE_DIGITS_RE.sub(" ", text) + text = _SPACE_AFTER_DIGITS_RE.sub(" ", text) + + return text + + +def english_characters_space(text: str) -> str: + """ + Functionality to add spaces before and after English words in the given Urdu text. It is an important step in + normalization of the Urdu data. + + this function returns a :py:class:`String` object which contains the original text with spaces before & after + English words. + + Args: + text (str): ``Urdu`` text + Returns: + str: Returns a ``str`` object containing normalized text. + Examples: + >>> from urduhack.preprocessing import english_characters_space + >>> text = "خاتون Aliyaنے بچوںUzma and Aliyaکے قتل کا اعترافConfession کیا ہے۔" + >>> normalized_text = english_characters_space(text) + >>> normalized_text + خاتون Aliya نے بچوں Uzma and Aliya کے قتل کا اعتراف Confession کیا ہے۔ + """ + text = _SPACE_BEFORE_ENG_CHAR_RE.sub(" ", text) + text = _SPACE_AFTER_ENG_CHAR_RE.sub(" ", text) + + return text + + +def all_punctuations_space(text: str) -> str: + """ + Add spaces after punctuations used in ``urdu`` writing + + Args: + text (str): ``Urdu`` text + Returns: + str: Returns a ``str`` object containing normalized text. + """ + text = _SPACE_BEFORE_ALL_PUNCTUATIONS_RE.sub(" ", text) + text = _SPACE_AFTER_ALL_PUNCTUATIONS_RE.sub(" ", text) + return text + + +def preprocess(text: str) -> str: + """ + To preprocess some text, all you need to do pass ``unicode`` text. It will return a ``str`` + with proper spaces after digits and punctuations. + + Args: + text (str): ``Urdu`` text + Returns: + str: urdu text + Raises: + TypeError: If text param is not not str Type. + Examples: + >>> from urduhack.preprocessing import preprocess + >>> text = "اَباُوگل پاکستان ﻤﯿﮟ 20 سال ﺳﮯ ، وسائل کی کوئی کمی نہیں ﮨﮯ۔" + >>> normalized_text = preprocess(text) + >>> # The text now contains proper spaces after digits and punctuations, + >>> # normalized characters and no diacritics! + >>> normalized_text + اباوگل پاکستان ﻤﯿﮟ 20 سال ﺳﮯ ، وسائل کی کوئی کمی نہیں ﮨﮯ ۔ + """ + if not isinstance(text, str): + raise TypeError("text must be str type.") + + text = digits_space(text) + text = all_punctuations_space(text) + text = english_characters_space(text) + return text diff --git a/libs/indic_nlp_library/indicnlp/urduhack/preprocessing/regexes.py b/libs/indic_nlp_library/indicnlp/urduhack/preprocessing/regexes.py new file mode 100644 index 0000000000000000000000000000000000000000..97d26d6d43c3eb8e23eff8e867ab088282cd47c7 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/urduhack/preprocessing/regexes.py @@ -0,0 +1,46 @@ +# coding: utf8 +"""List of Regex for preprocess""" + +import string + +import regex as re + +from indicnlp.urduhack.urdu_characters import URDU_ALL_CHARACTERS, URDU_PUNCTUATIONS + +# Add spaces before|after numeric number and urdu words +# 18سالہ , 20فیصد +_EXCEPT_HAMZA = list(filter(lambda c: c != "\u0621", URDU_ALL_CHARACTERS)) +_SPACE_BEFORE_DIGITS_RE = re.compile( + r"(?<=[" + "".join(URDU_ALL_CHARACTERS) + "])(?=[0-9])", flags=re.U | re.M | re.I +) +_SPACE_AFTER_DIGITS_RE = re.compile( + r"(?<=[0-9])(?=[" + "".join(_EXCEPT_HAMZA) + "])", flags=re.U | re.M | re.I +) + +# Add spaces before|after english characters and urdu words +# ikramسالہ , abفیصد +_SPACE_BEFORE_ENG_CHAR_RE = re.compile( + r"(?<=[" + "".join(URDU_ALL_CHARACTERS) + "])(?=[a-zA-Z])", flags=re.U | re.M | re.I +) +_SPACE_AFTER_ENG_CHAR_RE = re.compile( + r"(?<=[a-zA-Z])(?=[" + "".join(URDU_ALL_CHARACTERS) + "])", flags=re.U | re.M | re.I +) + +# add space before and after all PUNCTUATIONS +_ALL_PUNCTUATIONS: str = "".join(URDU_PUNCTUATIONS) + "".join(string.punctuation) +_SPACE_BEFORE_ALL_PUNCTUATIONS_RE = re.compile( + r"(?<=[" + + "".join(URDU_ALL_CHARACTERS) + + "])(?=[" + + "".join(_ALL_PUNCTUATIONS) + + "])", + flags=re.U | re.M | re.I, +) +_SPACE_AFTER_ALL_PUNCTUATIONS_RE = re.compile( + r"(?<=[" + + "".join(_ALL_PUNCTUATIONS) + + "])(?=[^" + + "".join(_ALL_PUNCTUATIONS) + + "0-9 \n])", + flags=re.U | re.M | re.I, +) diff --git a/libs/indic_nlp_library/indicnlp/urduhack/preprocessing/util.py b/libs/indic_nlp_library/indicnlp/urduhack/preprocessing/util.py new file mode 100644 index 0000000000000000000000000000000000000000..504cf6735aff2a7a1948c6eedbd32143203ab857 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/urduhack/preprocessing/util.py @@ -0,0 +1,267 @@ +# coding: utf8 +""" +Preprocessing utilities +""" + +import sys +import unicodedata + +import regex as re + +CURRENCIES = { + "$": "USD", + "zł": "PLN", + "£": "GBP", + "¥": "JPY", + "฿": "THB", + "₡": "CRC", + "₦": "NGN", + "₩": "KRW", + "₪": "ILS", + "₫": "VND", + "€": "EUR", + "₱": "PHP", + "₲": "PYG", + "₴": "UAH", + "₹": "INR", +} + +_EMAIL_RE = re.compile( + r"(?:^|(?<=[^\w@.)]))([\w+-](\.(?!\.))?)*?[\w+-]@(?:\w-?)*?\w+(\.([a-z]{2,})){1,3}(?:$|(?=\b))", + flags=re.IGNORECASE | re.UNICODE, +) +_PHONE_RE = re.compile( + r"(?:^|(?<=[^\w)]))(\+?1[ .-]?)?(\(?\d{3}\)?[ .-]?)?(\d{3}[ .-]?\d{4})(\s?(?:ext\.?" + r"|[#x-])\s?\d{2,6})?(?:$|(?=\W))" +) +_NUMBERS_RE = re.compile( + r"(?:^|(?<=[^\w,.]))[+–-]?(([1-9]\d{0,2}(,\d{3})+(\.\d*)?)|([1-9]\d{0,2}([ .]\d{3})+(,\d*)?)" + r"|(\d*?[.,]\d+)|\d+)(?:$|(?=\b))" +) +_CURRENCY_RE = re.compile("({})+".format("|".join(re.escape(c) for c in CURRENCIES))) +_LINEBREAK_RE = re.compile(r"((\r\n)|[\n\v])+") +_NONBREAKING_SPACE_RE = re.compile(r"(?!\n)\s+") +_URL_RE = re.compile( + r"(?:^|(?= 224.0.0.0 + # excludes network & broadcast addresses + # (first & last IP address of each class) + r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" + r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" + r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" + r"|" + # host name + r"(?:(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)" + # domain name + r"(?:\.(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)*" + # TLD identifier + r"(?:\.(?:[a-z\u00a1-\uffff]{2,}))" r")" + # port number + r"(?::\d{2,5})?" + # resource path + r"(?:/\S*)?" r"(?:$|(?![\w?!+&/]))", + flags=re.UNICODE | re.IGNORECASE, +) # source: https://gist.github.com/dperini/729294 +_SHORT_URL_RE = re.compile( + r"(?:^|(?>> from urduhack.preprocessing import normalize_whitespace + >>> text = "عراق اور شام اعلان کیا ہے دونوں جلد اپنے گے؟" + >>> normalized_text = normalize_whitespace(text) + >>> normalized_text + عراق اور شام اعلان کیا ہے دونوں جلد اپنے گے؟ + """ + return _NONBREAKING_SPACE_RE.sub(" ", _LINEBREAK_RE.sub(r"\n", text)).strip() + + +def replace_urls(text: str, replace_with=""): + """ + Replace all URLs in ``text`` str with ``replace_with`` str. + + Args: + text (str): ``Urdu`` text + replace_with (str): Replace string + Returns: + str: Returns a ``str`` object replace url with ``replace_with`` text. + Examples: + >>> from urduhack.preprocessing import replace_urls + >>> text = "20 www.gmail.com فیصد" + >>> replace_urls(text) + '20 فیصد' + """ + return _URL_RE.sub(replace_with, _SHORT_URL_RE.sub(replace_with, text)) + + +def replace_emails(text: str, replace_with=""): + """ + Replace all emails in ``text`` str with ``replace_with`` str. + + Args: + text (str): ``Urdu`` text + replace_with (str): Replace string + Returns: + str: Returns a ``str`` object replace emails with ``replace_with`` text. + Examples: + >>> text = "20 gunner@gmail.com فیصد" + >>> from urduhack.preprocessing import replace_emails + >>> replace_emails(text) + """ + return _EMAIL_RE.sub(replace_with, text) + + +def replace_phone_numbers(text: str, replace_with=""): + """ + Replace all phone numbers in ``text`` str with ``replace_with`` str. + + Args: + text (str): ``Urdu`` text + replace_with (str): Replace string + Returns: + str: Returns a ``str`` object replace number_no with ``replace_with`` text. + Examples: + >>> from urduhack.preprocessing import replace_numbers + >>> text = "20 فیصد" + >>> replace_numbers(text) + ' فیصد' + """ + return _PHONE_RE.sub(replace_with, text) + + +def replace_numbers(text: str, replace_with=""): + """ + Replace all numbers in ``text`` str with ``replace_with`` str. + + Args: + text (str): ``Urdu`` text + replace_with (str): Replace string + Returns: + str: Returns a ``str`` object replace number with ``replace_with`` text. + Examples: + >>> from urduhack.preprocessing import replace_phone_numbers + >>> text = "یعنی لائن آف کنٹرول پر فائربندی کا معاہدہ 555-123-4567 میں ہوا تھا" + >>> replace_phone_numbers(text) + 'یعنی لائن آف کنٹرول پر فائربندی کا معاہدہ میں ہوا تھا' + """ + return _NUMBERS_RE.sub(replace_with, text) + + +def replace_currency_symbols(text: str, replace_with=None): + """ + Replace all currency symbols in ``text`` str with string specified by ``replace_with`` str. + + Args: + text (str): Raw text + replace_with (str): if None (default), replace symbols with + their standard 3-letter abbreviations (e.g. '$' with 'USD', '£' with 'GBP'); + otherwise, pass in a string with which to replace all symbols + (e.g. "*CURRENCY*") + Returns: + str: Returns a ``str`` object containing normalized text. + Examples: + >>> from urduhack.preprocessing import replace_currency_symbols + >>> text = "یعنی لائن آف کنٹرول پر فائربندی کا معاہدہ 2003 میں ہوا 33$ تھا۔" + >>> replace_currency_symbols(text) + 'یعنی لائن آف کنٹرول پر فائربندی کا معاہدہ 2003 میں ہوا 33USD تھا۔' + """ + if replace_with is None: + for key, value in CURRENCIES.items(): + text = text.replace(key, value) + return text + + return _CURRENCY_RE.sub(replace_with, text) + + +PUNCTUATION_TRANSLATE_UNICODE = dict.fromkeys( + (i for i in range(sys.maxunicode) if unicodedata.category(chr(i)).startswith("P")), + "", +) + + +def remove_punctuation(text: str, marks=None) -> str: + """ + Remove punctuation from ``text`` by removing all instances of ``marks``. + + Args: + text (str): Urdu text + marks (str): If specified, remove only the characters in this string, + e.g. ``marks=',;:'`` removes commas, semi-colons, and colons. + Otherwise, all punctuation marks are removed. + Returns: + str: returns a ``str`` object containing normalized text. + Note: + When ``marks=None``, Python's built-in :meth:`str.translate()` is + used to remove punctuation; otherwise, a regular expression is used + instead. The former's performance is about 5-10x faster. + Examples: + >>> from urduhack.preprocessing import remove_punctuation + >>> output = remove_punctuation("کر ؟ سکتی ہے۔") + کر سکتی ہے + + """ + if marks: + return re.sub("[{}]+".format(re.escape(marks)), "", text, flags=re.UNICODE) + + return text.translate(PUNCTUATION_TRANSLATE_UNICODE) + + +def remove_accents(text: str) -> str: + """ + Remove accents from any accented unicode characters in ``text`` str, either by + transforming them into ascii equivalents or removing them entirely. + + Args: + text (str): Urdu text + Returns: + str + Examples: + >>> from urduhack.preprocessing import remove_accents + >>>text = "دالتِ عظمیٰ درخواست" + >>> remove_accents(text) + 'دالت عظمی درخواست' + """ + return "".join(c for c in text if not unicodedata.combining(c)) + + +def remove_english_alphabets(text: str): + """ + Removes ``English`` words and digits from a ``text`` + + Args: + text (str): Urdu text + Returns: + str: ``str`` object with english alphabets removed + """ + characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890" + table = str.maketrans({key: None for key in characters}) + return text.translate(table) diff --git a/libs/indic_nlp_library/indicnlp/urduhack/stop_words.py b/libs/indic_nlp_library/indicnlp/urduhack/stop_words.py new file mode 100644 index 0000000000000000000000000000000000000000..069beeaec723c75171a438fbd2918019c9048737 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/urduhack/stop_words.py @@ -0,0 +1,48 @@ +# coding: utf8 +""" +Complete collection of stopwords for the Urdu language. +Maintainer: Ikram Ali(mrikram1989@gmail.com) +version = 2020.08.01 +Source = https://github.com/urduhack/urdu-stopwords +""" +from typing import FrozenSet + +# Urdu Language Stop words list +STOP_WORDS: FrozenSet[str] = frozenset( + """ + + آ آئی آئیں آئے آتا آتی آتے آس آمدید آنا آنسہ آنی آنے آپ آگے آہ آہا آیا اب ابھی ابے + ارے اس اسکا اسکی اسکے اسی اسے اف افوہ البتہ الف ان اندر انکا انکی انکے انہوں انہی انہیں اوئے اور اوپر + اوہو اپ اپنا اپنوں اپنی اپنے اپنےآپ اکثر اگر اگرچہ اہاہا ایسا ایسی ایسے ایک بائیں بار بارے بالکل باوجود باہر + بج بجے بخیر بشرطیکہ بعد بعض بغیر بلکہ بن بنا بناؤ بند بڑی بھر بھریں بھی بہت بہتر تاکہ تاہم تب تجھ + تجھی تجھے ترا تری تلک تم تمام تمہارا تمہاروں تمہاری تمہارے تمہیں تو تک تھا تھی تھیں تھے تیرا تیری تیرے + جا جاؤ جائیں جائے جاتا جاتی جاتے جانی جانے جب جبکہ جدھر جس جسے جن جناب جنہوں جنہیں جو جہاں جی جیسا + جیسوں جیسی جیسے حالانکہ حالاں حصہ حضرت خاطر خالی خواہ خوب خود دائیں درمیان دریں دو دوران دوسرا دوسروں دوسری دوں + دکھائیں دی دیئے دیا دیتا دیتی دیتے دیر دینا دینی دینے دیکھو دیں دیے دے ذریعے رکھا رکھتا رکھتی رکھتے رکھنا رکھنی + رکھنے رکھو رکھی رکھے رہ رہا رہتا رہتی رہتے رہنا رہنی رہنے رہو رہی رہیں رہے ساتھ سامنے ساڑھے سب سبھی + سراسر سمیت سوا سوائے سکا سکتا سکتے سہ سہی سی سے شاید شکریہ صاحب صاحبہ صرف ضرور طرح طرف طور علاوہ عین + فقط فلاں فی قبل قطا لئے لائی لائے لاتا لاتی لاتے لانا لانی لانے لایا لو لوجی لوگوں لگ لگا لگتا + لگتی لگی لگیں لگے لہذا لی لیا لیتا لیتی لیتے لیکن لیں لیے لے ماسوا مت مجھ مجھی مجھے محترم محترمہ محض + مرا مرحبا مری مرے مزید مس مسز مسٹر مطابق مل مکرمی مگر مگھر مہربانی میرا میروں میری میرے میں نا نزدیک + نما نہ نہیں نیز نیچے نے و وار واسطے واقعی والا والوں والی والے واہ وجہ ورنہ وغیرہ ولے وگرنہ وہ وہاں + وہی وہیں ویسا ویسے ویں پاس پایا پر پس پلیز پون پونی پونے پھر پہ پہلا پہلی پہلے پیر پیچھے چاہئے + چاہتے چاہیئے چاہے چلا چلو چلیں چلے چناچہ چند چونکہ چکی چکیں چکے ڈالنا ڈالنی ڈالنے ڈالے کئے کا کاش کب کبھی + کدھر کر کرتا کرتی کرتے کرم کرنا کرنے کرو کریں کرے کس کسی کسے کم کن کنہیں کو کوئی کون کونسا + کونسے کچھ کہ کہا کہاں کہہ کہی کہیں کہے کی کیا کیسا کیسے کیونکر کیونکہ کیوں کیے کے گئی گئے گا گنا + گو گویا گی گیا ہائیں ہائے ہاں ہر ہرچند ہرگز ہم ہمارا ہماری ہمارے ہمی ہمیں ہو ہوئی ہوئیں ہوئے ہوا + ہوبہو ہوتا ہوتی ہوتیں ہوتے ہونا ہونگے ہونی ہونے ہوں ہی ہیلو ہیں ہے یا یات یعنی یک یہ یہاں یہی یہیں + +""".split() +) + + +def remove_stopwords(text: str) -> str: + """ + Remove STOP_WORDS from `text`. + + Args: + text (str): Urdu text + Returns: + str: ``str`` object with stop words removed + """ + return " ".join(word for word in text.split() if word not in STOP_WORDS) diff --git a/libs/indic_nlp_library/indicnlp/urduhack/tokenization/__init__.py b/libs/indic_nlp_library/indicnlp/urduhack/tokenization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d1ebccbba3232ff0150eaf865792e77d21a25c8b --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/urduhack/tokenization/__init__.py @@ -0,0 +1,25 @@ +# coding: utf8 +""" +Tokenization +============== + +This module is another crucial part of the Urduhack. This module performs tokenization on sentence. It separates +different sentence from each other and converts each string into a complete **sentence token**. Note here you must not +confuse yourself with the word token. They are two completely different things. + +This library provides state of art word tokenizer for Urdu Language. It takes care of the spaces and where to connect +two urdu characters and where not to. + +The tokenization of Urdu text is necessary to make it useful for the NLP tasks. +This module provides the following functionality: + + - Sentence Tokenization + - Word Tokenization + +The tokenization of Urdu text is necessary to make it useful for the machine +learning tasks. In the :py:mod:`~urduhack.tokenization` module, we solved the problem related to +sentence and word tokenization. +""" +from .tokenizer import sentence_tokenizer + +__all__ = ["sentence_tokenizer"] diff --git a/libs/indic_nlp_library/indicnlp/urduhack/tokenization/eos.py b/libs/indic_nlp_library/indicnlp/urduhack/tokenization/eos.py new file mode 100644 index 0000000000000000000000000000000000000000..f7e0cce0aeec31fa010146568a840bfb502b1b5c --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/urduhack/tokenization/eos.py @@ -0,0 +1,129 @@ +# coding: utf8 +"""Rule based Sentence tokenization module""" + +# Global Variables +_URDU_CONJUNCTIONS = [ + "جنہیں", + "جس", + "جن", + "جو", + "اور", + "اگر", + "اگرچہ", + "لیکن", + "مگر", + "پر", + "یا", + "تاہم", + "کہ", + "کر", + "تو", + "گے", + "گی", +] +_URDU_NEWLINE_WORDS = [ + "کیجیے", + "کیجئے", + "گئیں", + "تھیں", + "ہوں", + "خریدا", + "گے", + "ہونگے", + "گا", + "چاہیے", + "ہوئیں", + "گی", + "تھا", + "تھی", + "تھے", + "ہیں", + "ہے", +] + + +def _split_and_keep(_str, separator): + """Replace end of sentence with separator""" + if not _str: + return [] + max_p = chr(ord(max(_str)) + 1) + return _str.replace(separator, separator + max_p).split(max_p) + + +def _generate_sentences(text: str) -> list: + """Generate a list of urdu sentences from a given string. + This function automatically fixes multiple whitespaces + or new lines so you just need to pass the data and + get sentences in return. + + Args: + text (str): base string + Returns: + list + """ + all_sentences = [] + sentences = _split_and_keep(text, "۔") + + for sentence in sentences: # pylint: disable=too-many-nested-blocks + if sentence and (len(sentence.split()) >= 2): + if "؟" in sentence: + q_sentences = _split_and_keep(sentence, "؟") + for _sen in q_sentences: + _sen = _sen.split() + new_sent = "" + is_cont = False + + for index, word in enumerate(_sen): + if is_cont: + is_cont = False + continue + + if ( + word in _URDU_NEWLINE_WORDS + and index + 1 < len(_sen) + and _sen[index + 1] not in _URDU_CONJUNCTIONS + ): + if index + 1 < len(_sen) and _sen[index + 1] in ["۔", "،"]: + new_sent += " " + word + " " + _sen[index + 1] + "\n" + is_cont = True + else: + new_sent += " " + word + "\n" + + else: + new_sent += " " + word + + for sen in new_sent.split("\n"): + if sen and len(sen.split()) >= 2: + all_sentences.append(sen.strip()) + + else: + sentence = sentence.split() + new_sent = "" + is_cont = False + + for index, word in enumerate(sentence): + if is_cont: + is_cont = False + continue + + if ( + word in _URDU_NEWLINE_WORDS + and index + 1 < len(sentence) + and sentence[index + 1] not in _URDU_CONJUNCTIONS + ): + if index + 1 < len(sentence) and sentence[index + 1] in [ + "۔", + "،", + ]: + new_sent += " " + word + " " + sentence[index + 1] + "\n" + is_cont = True + else: + new_sent += " " + word + "\n" + else: + new_sent += " " + word + + for sen in new_sent.split("\n"): + if sen and len(sen.split()) >= 2: + all_sentences.append(sen.strip()) + + return all_sentences diff --git a/libs/indic_nlp_library/indicnlp/urduhack/tokenization/tokenizer.py b/libs/indic_nlp_library/indicnlp/urduhack/tokenization/tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..62ed9a5c7e126180588f95fd56977a7b30ef7f2d --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/urduhack/tokenization/tokenizer.py @@ -0,0 +1,33 @@ +# coding: utf8 +""" +This module provides the functionality to generate tokens (both sentence and word wise) from Urdu text. +""" + +from typing import List + +from .eos import _generate_sentences + + +def sentence_tokenizer(text: str) -> List[str]: + """ + Convert ``Urdu`` text into possible sentences. + If successful, this function returns a :py:class:`List` object containing multiple urdu :py:class:`String` + sentences. + + Args: + text (str): ``Urdu`` text + Returns: + list: Returns a ``list`` object containing multiple urdu sentences type ``str``. + Raises: + TypeError: If text is not a str Type + Examples: + >>> from urduhack.tokenization import sentence_tokenizer + >>> text = "عراق اور شام نے اعلان کیا ہے دونوں ممالک جلد اپنے اپنے سفیروں کو واپس بغداد اور دمشق بھیج دیں گے؟" + >>> sentences = sentence_tokenizer(text) + >>> sentences + ["دونوں ممالک جلد اپنے اپنے سفیروں کو واپس بغداد اور دمشق بھیج دیں گے؟" ,"عراق اور شام نے اعلان کیا ہے۔"] + """ + if not isinstance(text, str): + raise TypeError("text parameter must be str type.") + + return _generate_sentences(text) diff --git a/libs/indic_nlp_library/indicnlp/urduhack/urdu_characters.py b/libs/indic_nlp_library/indicnlp/urduhack/urdu_characters.py new file mode 100644 index 0000000000000000000000000000000000000000..bd374247370af38f82d982e16c34974c7653a168 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/urduhack/urdu_characters.py @@ -0,0 +1,138 @@ +# coding: utf8 +""" +Complete collection of Urdu Unicode characters. +Maintainer: Ikram Ali(mrikram1989@gmail.com) +version = 2020.04.07 +Source = https://github.com/urduhack/urdu-characters +""" + +from typing import FrozenSet, Dict + +# Urdu Alphabets +URDU_ALPHABETS: FrozenSet[str] = frozenset( + "آ أ ا ب پ ت ٹ ث " + " ج چ ح خ " + " د ڈ ذ ر ڑ ز ژ " + " س ش ص ض ط ظ ع غ " + " ف ق ک گ ل م " + " ن ں و ؤ ہ ۂ ۃ " + " ھ ء ی ئ ے ۓ ".split() +) + +# Urdu Digits from 0 to 9 +URDU_DIGITS: FrozenSet[str] = frozenset("۰ ۱ ۲ ۳ ۴ ۵ ۶ ۷ ۸ ۹".split()) + +# Urdu Punctuations +URDU_PUNCTUATIONS: FrozenSet[str] = frozenset("؛ ، ٫ ؟ ۔ ٪".split()) + +# Urdu Aerabs +URDU_DIACRITICS: FrozenSet[str] = frozenset( + "\u064e \u064B \u0670 \u0650 \u064F \u064d".split() +) + +# Urdu Extra Characters +URDU_EXTRA_CHARACTERS: FrozenSet[str] = frozenset( + " ؀ ؁ ؂ ؃ ؍ ؎ ؏ ؐ ؑ ؒ ؓ ؔ ؕ ٌ ّ ْ ٓ ٔ ٖ ٗ ٘ ٬".split() +) + +# Complete list of Urdu language Characters. +URDU_ALL_CHARACTERS: FrozenSet[str] = frozenset().union( + URDU_ALPHABETS, + URDU_DIGITS, + URDU_PUNCTUATIONS, # type: ignore + URDU_DIACRITICS, + URDU_EXTRA_CHARACTERS, +) # type: ignore + +URDU_ALL_CHARACTERS_UNICODE: Dict[str, str] = { + "\u0600": "\u0600", + "\u0601": "\u0601", + "\u0602": "\u0602", + "\u0603": "\u0603", + "،": "\u060c", + "؍": "\u060d", + "؎": "\u060e", + "؏": "\u060f", + "ؐ": "\u0610", + "ؑ": "\u0611", + "ؒ": "\u0612", + "ؓ": "\u0613", + "ؔ": "\u0614", + "ؕ": "\u0615", + "؛": "\u061b", + "؟": "\u061f", + "ء": "\u0621", + "ً": "\u064b", + "ٌ": "\u064c", + "ٍ": "\u064d", + "َ": "\u064e", + "ُ": "\u064f", + "ِ": "\u0650", + "ّ": "\u0651", + "ْ": "\u0652", + "ٓ": "\u0653", + "ٔ": "\u0654", + "ٖ": "\u0656", + "ٗ": "\u0657", + "٘": "\u0658", + "٪": "\u066a", + "٫": "\u066b", + "٬": "\u066c", + "ٰ": "\u0670", + "۔": "\u06d4", + "آ": "\u0622", + "أ": "\u0623", + "ا": "\u0627", + "ب": "\u0628", + "پ": "\u067e", + "ت": "\u062a", + "ٹ": "\u0679", + "ث": "\u062b", + "ج": "\u062c", + "چ": "\u0686", + "ح": "\u062d", + "خ": "\u062e", + "د": "\u062f", + "ڈ": "\u0688", + "ذ": "\u0630", + "ر": "\u0631", + "ڑ": "\u0691", + "ز": "\u0632", + "ژ": "\u0698", + "س": "\u0633", + "ش": "\u0634", + "ص": "\u0635", + "ض": "\u0636", + "ط": "\u0637", + "ظ": "\u0638", + "ع": "\u0639", + "غ": "\u063a", + "ف": "\u0641", + "ق": "\u0642", + "ک": "\u06a9", + "گ": "\u06af", + "ل": "\u0644", + "م": "\u0645", + "ن": "\u0646", + "ں": "\u06ba", + "و": "\u0648", + "ؤ": "\u0624", + "ھ": "\u06be", + "ہ": "\u06c1", + "ۂ": "\u06c2", + "ۃ": "\u06c3", + "ی": "\u06cc", + "ئ": "\u0626", + "ے": "\u06d2", + "ۓ": "\u06d3", + "۰": "\u06f0", + "۱": "\u06f1", + "۲": "\u06f2", + "۳": "\u06f3", + "۴": "\u06f4", + "۵": "\u06f5", + "۶": "\u06f6", + "۷": "\u06f7", + "۸": "\u06f8", + "۹": "\u06f9", +} diff --git a/libs/indic_nlp_library/indicnlp/version.py b/libs/indic_nlp_library/indicnlp/version.py new file mode 100644 index 0000000000000000000000000000000000000000..a0235ce508e8df790674398c102bb5b4a0ca7fa0 --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/version.py @@ -0,0 +1 @@ +__version__ = "0.0.2" \ No newline at end of file diff --git a/libs/indic_nlp_library/indicnlp/version.txt b/libs/indic_nlp_library/indicnlp/version.txt new file mode 100644 index 0000000000000000000000000000000000000000..892b851483de60a413d559ea22df3957a09d174b --- /dev/null +++ b/libs/indic_nlp_library/indicnlp/version.txt @@ -0,0 +1 @@ +0.0.2 diff --git a/libs/indic_nlp_library/requirements.txt b/libs/indic_nlp_library/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fce1c561f1c7435c9082ed91089128f6ecc5b2b --- /dev/null +++ b/libs/indic_nlp_library/requirements.txt @@ -0,0 +1,5 @@ +sphinx-argparse +sphinx_rtd_theme +morfessor +pandas +numpy diff --git a/libs/indic_nlp_library/setup.py b/libs/indic_nlp_library/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..8736c47fb290c15febf93c24ade4ab144e48cf6e --- /dev/null +++ b/libs/indic_nlp_library/setup.py @@ -0,0 +1,47 @@ +import os +import pathlib +import setuptools +from sys import version_info, exit +from pkg_resources import parse_requirements + + +def write_version_py(): + with open(os.path.join("indicnlp", "version.txt"), "r") as f: + version = f.read().strip() + + with open(os.path.join("indicnlp", "version.py"), "w") as f: + f.write(f'__version__ = "{version}"') + return version + + +if version_info < (3, 8): + exit("Sorry, Python >= 3.8 is required for IndicNLP Library for IT2") + +with open("README.md", "r", errors="ignore", encoding="utf-8") as fh: + long_description = fh.read() + +version = write_version_py() + +setuptools.setup( + name="indic_nlp_library_IT2", # Replace with your own username + version=version, + author="Varun Gumma", + author_email="varun230999@gmail.com", + description="The goal of the Indic NLP Library is to build Python based libraries for common" + " text processing and Natural Language Processing in Indian languages. This fork is specialized for IndicTrans2.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/VarunGumma/indic_nlp_library", + packages=setuptools.find_packages(), + license="MIT", + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires=">=3.8", + install_requires=[ + str(requirement) + for requirement in parse_requirements(pathlib.Path("requirements.txt").open()) + ], +) diff --git a/main.ipynb b/main.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..1d4846014b1fa297963cf8a9ba8b9a70c95d61f8 --- /dev/null +++ b/main.ipynb @@ -0,0 +1,185 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 20, + "id": "f73dec47", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['இது ஒரு சோதனை வாக்கியம். ', 'இது மற்றொரு நீண்ட வித்தியாசமான சோதனை வாக்கியமாகும். ', '9876543210 என்ற எண்ணுக்கு ஒரு எஸ்எம்எஸ் அனுப்பவும், 2023 அக்டோபர் 15 ஆம் தேதிக்குள் newemail123@xyz.com என்ற மின்னஞ்சல் முகவரிக்கு அனுப்பவும். ']\n" + ] + } + ], + "source": [ + "\n", + "\n", + "import torch\n", + "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n", + "\n", + "ip = IndicProcessor(inference=True)\n", + "tokenizer = AutoTokenizer.from_pretrained(\"ai4bharat/indictrans2-en-indic-dist-200M\", trust_remote_code=True)\n", + "model = AutoModelForSeq2SeqLM.from_pretrained(\"ai4bharat/indictrans2-en-indic-dist-200M\", trust_remote_code=True)\n", + "\n", + "sentences = [\n", + " \"This is a test sentence.\",\n", + " \"This is another longer different test sentence.\",\n", + " \"Please send an SMS to 9876543210 and an email on newemail123@xyz.com by 15th October, 2023.\",\n", + "]\n", + "\n", + "batch = ip.preprocess_batch(sentences, src_lang=\"eng_Latn\", tgt_lang=\"tam_Taml\", visualize=False) # set it to visualize=True to print a progress bar\n", + "batch = tokenizer(batch, padding=\"longest\", truncation=True, max_length=256, return_tensors=\"pt\")\n", + "\n", + "with torch.inference_mode():\n", + " outputs = model.generate(**batch, num_beams=5, num_return_sequences=1, max_length=256)\n", + "\n", + "with tokenizer.as_target_tokenizer():\n", + " # This scoping is absolutely necessary, as it will instruct the tokenizer to tokenize using the target vocabulary.\n", + " # Failure to use this scoping will result in gibberish/unexpected predictions as the output will be de-tokenized with the source vocabulary instead.\n", + " outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True)\n", + "\n", + "outputs = ip.postprocess_batch(outputs, lang=\"tam_Taml\")\n", + "print(outputs)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ec49007", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6fa9fc68", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "import torch\n", + "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n", + "\n", + "# Add local IndicTransToolkit path if needed\n", + "sys.path.append(os.path.abspath(\"libs/IndicTransToolkit\"))\n", + "from IndicTransToolkit.processor import IndicProcessor\n", + "\n", + "# Load processor, tokenizer, and model\n", + "ip = IndicProcessor(inference=True)\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(\"ai4bharat/indictrans2-en-indic-dist-200M\", trust_remote_code=True)\n", + "model = AutoModelForSeq2SeqLM.from_pretrained(\"ai4bharat/indictrans2-en-indic-dist-200M\", trust_remote_code=True)\n", + "\n", + "def translate(text, target_lang):\n", + " if not text.strip():\n", + " return \"Please enter some text.\"\n", + "\n", + " # Preprocess\n", + " batch = ip.preprocess_batch([text], src_lang=\"eng_Latn\", tgt_lang=target_lang)\n", + " batch = tokenizer(batch, padding=\"longest\", truncation=True, max_length=256, return_tensors=\"pt\")\n", + "\n", + " # Inference\n", + " with torch.inference_mode():\n", + " outputs = model.generate(**batch, num_beams=5, max_length=256)\n", + "\n", + " # Postprocess\n", + " with tokenizer.as_target_tokenizer():\n", + " decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True)\n", + "\n", + " return ip.postprocess_batch(decoded, lang=target_lang)[0]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "c4ae654a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'வணக்கம். '" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "translate_text(\"hello\",\"tam_Taml\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "530f0925", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Translation: टाम् @टाम्ल नमस्कार, आप कैसे हैं? \n" + ] + } + ], + "source": [ + "import requests\n", + "\n", + "url = \"http://localhost:7860/translate\"\n", + "\n", + "payload = {\n", + " \"text\": \"Hello, how are you?\",\n", + " \"target_lang\": \"tam_Taml\"\n", + "}\n", + "\n", + "headers = {\n", + " \"Content-Type\": \"application/json\"\n", + "}\n", + "\n", + "response = requests.post(url, json=payload, headers=headers)\n", + "\n", + "if response.status_code == 200:\n", + " print(\"Translation:\", response.json()[\"translation\"])\n", + "else:\n", + " print(\"Error:\", response.status_code, response.text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73eb9c61", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "indietrans2", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..79ea12212dbcd3ee216170e8fd7754197140d793 Binary files /dev/null and b/requirements.txt differ