Spaces:
Running
Running
File size: 4,721 Bytes
b72ab63 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
"""Extend the Python codecs module with a few encodings that are used in OpenType (name table)
but missing from Python. See https://github.com/fonttools/fonttools/issues/236 for details."""
import codecs
import encodings
class ExtendCodec(codecs.Codec):
def __init__(self, name, base_encoding, mapping):
self.name = name
self.base_encoding = base_encoding
self.mapping = mapping
self.reverse = {v: k for k, v in mapping.items()}
self.max_len = max(len(v) for v in mapping.values())
self.info = codecs.CodecInfo(
name=self.name, encode=self.encode, decode=self.decode
)
codecs.register_error(name, self.error)
def _map(self, mapper, output_type, exc_type, input, errors):
base_error_handler = codecs.lookup_error(errors)
length = len(input)
out = output_type()
while input:
# first try to use self.error as the error handler
try:
part = mapper(input, self.base_encoding, errors=self.name)
out += part
break # All converted
except exc_type as e:
# else convert the correct part, handle error as requested and continue
out += mapper(input[: e.start], self.base_encoding, self.name)
replacement, pos = base_error_handler(e)
out += replacement
input = input[pos:]
return out, length
def encode(self, input, errors="strict"):
return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors)
def decode(self, input, errors="strict"):
return self._map(codecs.decode, str, UnicodeDecodeError, input, errors)
def error(self, e):
if isinstance(e, UnicodeDecodeError):
for end in range(e.start + 1, e.end + 1):
s = e.object[e.start : end]
if s in self.mapping:
return self.mapping[s], end
elif isinstance(e, UnicodeEncodeError):
for end in range(e.start + 1, e.start + self.max_len + 1):
s = e.object[e.start : end]
if s in self.reverse:
return self.reverse[s], end
e.encoding = self.name
raise e
_extended_encodings = {
"x_mac_japanese_ttx": (
"shift_jis",
{
b"\xFC": chr(0x007C),
b"\x7E": chr(0x007E),
b"\x80": chr(0x005C),
b"\xA0": chr(0x00A0),
b"\xFD": chr(0x00A9),
b"\xFE": chr(0x2122),
b"\xFF": chr(0x2026),
},
),
"x_mac_trad_chinese_ttx": (
"big5",
{
b"\x80": chr(0x005C),
b"\xA0": chr(0x00A0),
b"\xFD": chr(0x00A9),
b"\xFE": chr(0x2122),
b"\xFF": chr(0x2026),
},
),
"x_mac_korean_ttx": (
"euc_kr",
{
b"\x80": chr(0x00A0),
b"\x81": chr(0x20A9),
b"\x82": chr(0x2014),
b"\x83": chr(0x00A9),
b"\xFE": chr(0x2122),
b"\xFF": chr(0x2026),
},
),
"x_mac_simp_chinese_ttx": (
"gb2312",
{
b"\x80": chr(0x00FC),
b"\xA0": chr(0x00A0),
b"\xFD": chr(0x00A9),
b"\xFE": chr(0x2122),
b"\xFF": chr(0x2026),
},
),
}
_cache = {}
def search_function(name):
name = encodings.normalize_encoding(name) # Rather undocumented...
if name in _extended_encodings:
if name not in _cache:
base_encoding, mapping = _extended_encodings[name]
assert name[-4:] == "_ttx"
# Python 2 didn't have any of the encodings that we are implementing
# in this file. Python 3 added aliases for the East Asian ones, mapping
# them "temporarily" to the same base encoding as us, with a comment
# suggesting that full implementation will appear some time later.
# As such, try the Python version of the x_mac_... first, if that is found,
# use *that* as our base encoding. This would make our encoding upgrade
# to the full encoding when and if Python finally implements that.
# http://bugs.python.org/issue24041
base_encodings = [name[:-4], base_encoding]
for base_encoding in base_encodings:
try:
codecs.lookup(base_encoding)
except LookupError:
continue
_cache[name] = ExtendCodec(name, base_encoding, mapping)
break
return _cache[name].info
return None
codecs.register(search_function)
|