# -*- coding: utf-8 -*- """Write to stdout without causing UnicodeEncodeError.""" import sys if (getattr(sys.stdout, 'errors', '') == 'strict' and not getattr(sys.stdout, 'encoding', '').lower().startswith('utf')): try: import translit sys.stdout = translit.StreamFilter(sys.stdout) except ImportError: import codecs import unicodedata import warnings TRANSLIT_MAP = { 0x2018: "'", 0x2019: "'", 0x201c: '"', 0x201d: '"', } def simplify(s): s = s.translate(TRANSLIT_MAP) return ''.join([c for c in unicodedata.normalize('NFKD', s) if not unicodedata.combining(c)]) def simple_translit_error_handler(error): if not isinstance(error, UnicodeEncodeError): raise error chunk = error.object[error.start:error.end] repl = simplify(chunk) repl = (repl.encode(error.encoding, 'backslashreplace') .decode(error.encoding)) return repl, error.end class SimpleTranslitStreamFilter: """Filter a stream through simple transliteration.""" errors = 'simple_translit' def __init__(self, target): self.target = target def __getattr__(self, name): return getattr(self.target, name) def write(self, s): self.target.write(self.downgrade(s)) def writelines(self, lines): self.target.writelines( [self.downgrade(line) for line in lines]) def downgrade(self, s): return (s.encode(self.target.encoding, self.errors) .decode(self.target.encoding)) codecs.register_error(SimpleTranslitStreamFilter.errors, simple_translit_error_handler) sys.stdout = SimpleTranslitStreamFilter(sys.stdout) warnings.warn('translit is unavailable', ImportWarning)