jeremyarancio's picture
fix: :zap: Handle Server timeout errors + Add whitespace additions and deletions in text diff
161967b
from difflib import Differ
from typing import Tuple, Iterable
import logging
def compute_diff(text1, text2) -> Iterable[Tuple[str, str | None]]:
d = Differ()
pairs = [
(token[2:], token[0] if token[0] != " " else None)
for token in d.compare(text1, text2)
]
return _postprocess_compute_diff(pairs)
def _postprocess_compute_diff(pairs: Iterable[Tuple[str, str | None]]) -> Iterable[Tuple[str, str | None]]:
"""Whitespace deletions add additions are missed by the diff component."""
for idx, (char, flag) in enumerate(pairs):
if char == " " and flag in ["+", "-"]:
if idx > 0 and idx < len(pairs):
if pairs[idx - 1][1] == flag or pairs[idx + 1][1] == flag:
yield (" ", flag)
else:
yield ("^", "+") if flag == "+" else ("#", "-")
else:
yield (char, flag)
def get_logger():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
return logging.getLogger(__name__)
LANGS = {
"All": None,
"English": "en",
"French": "fr",
"German": "de",
"Spanish": "es",
"Italian": "it",
"Dutch": "nl",
"Polish": "pl",
"Portuguese": "pt",
"Swedish": "sv",
"Bulgarian": "bg",
"Romanian": "ro",
"Finnish": "fi",
"Russian": "ru",
"Norwegian Bokmål": "nb",
"Czech": "cs",
"Thai": "th",
"Danish": "da",
"Croatian": "hr",
"Hungarian": "hu",
"Arabic": "ar",
"Greek": "el",
"Japanese": "ja",
"Catalan": "ca",
"Serbian": "sr",
"Slovenian": "sl",
"Slovak": "sk",
"Turkish": "tr",
"Lithuanian": "lt",
"Chinese": "zh",
"Estonian": "et",
"Latvian": "lv",
"Undefined": "xx",
"Ukrainian": "uk",
"Indonesian": "id",
"Hebrew": "he",
"Vietnamese": "vi",
"Icelandic": "is",
"Latin": "la",
"Korean": "ko",
"Albanian": "sq",
"Georgian": "ka",
"Malay": "ms",
"Bosnian": "bs",
"Persian": "fa",
"Bengali": "bn",
"Galician": "gl",
"Kazakh": "kk",
"Macedonian": "mk",
"Norwegian Nynorsk": "nn",
"Hindi": "hi",
"Afar": "aa",
"Uzbek": "uz",
"Somali": "so",
"Afrikaans": "af"
}