fix: :zap: Handle Server timeout errors + Add whitespace additions and deletions in text diff
161967b
from difflib import Differ | |
from typing import Tuple, Iterable | |
import logging | |
def compute_diff(text1, text2) -> Iterable[Tuple[str, str | None]]: | |
d = Differ() | |
pairs = [ | |
(token[2:], token[0] if token[0] != " " else None) | |
for token in d.compare(text1, text2) | |
] | |
return _postprocess_compute_diff(pairs) | |
def _postprocess_compute_diff(pairs: Iterable[Tuple[str, str | None]]) -> Iterable[Tuple[str, str | None]]: | |
"""Whitespace deletions add additions are missed by the diff component.""" | |
for idx, (char, flag) in enumerate(pairs): | |
if char == " " and flag in ["+", "-"]: | |
if idx > 0 and idx < len(pairs): | |
if pairs[idx - 1][1] == flag or pairs[idx + 1][1] == flag: | |
yield (" ", flag) | |
else: | |
yield ("^", "+") if flag == "+" else ("#", "-") | |
else: | |
yield (char, flag) | |
def get_logger(): | |
logging.basicConfig( | |
level=logging.INFO, | |
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", | |
) | |
return logging.getLogger(__name__) | |
LANGS = { | |
"All": None, | |
"English": "en", | |
"French": "fr", | |
"German": "de", | |
"Spanish": "es", | |
"Italian": "it", | |
"Dutch": "nl", | |
"Polish": "pl", | |
"Portuguese": "pt", | |
"Swedish": "sv", | |
"Bulgarian": "bg", | |
"Romanian": "ro", | |
"Finnish": "fi", | |
"Russian": "ru", | |
"Norwegian Bokmål": "nb", | |
"Czech": "cs", | |
"Thai": "th", | |
"Danish": "da", | |
"Croatian": "hr", | |
"Hungarian": "hu", | |
"Arabic": "ar", | |
"Greek": "el", | |
"Japanese": "ja", | |
"Catalan": "ca", | |
"Serbian": "sr", | |
"Slovenian": "sl", | |
"Slovak": "sk", | |
"Turkish": "tr", | |
"Lithuanian": "lt", | |
"Chinese": "zh", | |
"Estonian": "et", | |
"Latvian": "lv", | |
"Undefined": "xx", | |
"Ukrainian": "uk", | |
"Indonesian": "id", | |
"Hebrew": "he", | |
"Vietnamese": "vi", | |
"Icelandic": "is", | |
"Latin": "la", | |
"Korean": "ko", | |
"Albanian": "sq", | |
"Georgian": "ka", | |
"Malay": "ms", | |
"Bosnian": "bs", | |
"Persian": "fa", | |
"Bengali": "bn", | |
"Galician": "gl", | |
"Kazakh": "kk", | |
"Macedonian": "mk", | |
"Norwegian Nynorsk": "nn", | |
"Hindi": "hi", | |
"Afar": "aa", | |
"Uzbek": "uz", | |
"Somali": "so", | |
"Afrikaans": "af" | |
} | |