Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
"""Write to stdout without causing UnicodeEncodeError.""" | |
import sys | |
if (getattr(sys.stdout, 'errors', '') == 'strict' and | |
not getattr(sys.stdout, 'encoding', '').lower().startswith('utf')): | |
try: | |
import translit | |
sys.stdout = translit.StreamFilter(sys.stdout) | |
except ImportError: | |
import codecs | |
import unicodedata | |
import warnings | |
TRANSLIT_MAP = { | |
0x2018: "'", | |
0x2019: "'", | |
0x201c: '"', | |
0x201d: '"', | |
} | |
def simplify(s): | |
s = s.translate(TRANSLIT_MAP) | |
return ''.join([c for c in unicodedata.normalize('NFKD', s) | |
if not unicodedata.combining(c)]) | |
def simple_translit_error_handler(error): | |
if not isinstance(error, UnicodeEncodeError): | |
raise error | |
chunk = error.object[error.start:error.end] | |
repl = simplify(chunk) | |
repl = (repl.encode(error.encoding, 'backslashreplace') | |
.decode(error.encoding)) | |
return repl, error.end | |
class SimpleTranslitStreamFilter: | |
"""Filter a stream through simple transliteration.""" | |
errors = 'simple_translit' | |
def __init__(self, target): | |
self.target = target | |
def __getattr__(self, name): | |
return getattr(self.target, name) | |
def write(self, s): | |
self.target.write(self.downgrade(s)) | |
def writelines(self, lines): | |
self.target.writelines( | |
[self.downgrade(line) for line in lines]) | |
def downgrade(self, s): | |
return (s.encode(self.target.encoding, self.errors) | |
.decode(self.target.encoding)) | |
codecs.register_error(SimpleTranslitStreamFilter.errors, | |
simple_translit_error_handler) | |
sys.stdout = SimpleTranslitStreamFilter(sys.stdout) | |
warnings.warn('translit is unavailable', ImportWarning) | |