Spaces:
Running
Running
"""Module for reading TFM (TeX Font Metrics) files. | |
The TFM format is described in the TFtoPL WEB source code, whose typeset form | |
can be found on `CTAN <http://mirrors.ctan.org/info/knuth-pdf/texware/tftopl.pdf>`_. | |
>>> from fontTools.tfmLib import TFM | |
>>> tfm = TFM("Tests/tfmLib/data/cmr10.tfm") | |
>>> | |
>>> # Accessing an attribute gets you metadata. | |
>>> tfm.checksum | |
1274110073 | |
>>> tfm.designsize | |
10.0 | |
>>> tfm.codingscheme | |
'TeX text' | |
>>> tfm.family | |
'CMR' | |
>>> tfm.seven_bit_safe_flag | |
False | |
>>> tfm.face | |
234 | |
>>> tfm.extraheader | |
{} | |
>>> tfm.fontdimens | |
{'SLANT': 0.0, 'SPACE': 0.33333396911621094, 'STRETCH': 0.16666698455810547, 'SHRINK': 0.11111164093017578, 'XHEIGHT': 0.4305553436279297, 'QUAD': 1.0000028610229492, 'EXTRASPACE': 0.11111164093017578} | |
>>> # Accessing a character gets you its metrics. | |
>>> # “width” is always available, other metrics are available only when | |
>>> # applicable. All values are relative to “designsize”. | |
>>> tfm.chars[ord("g")] | |
{'width': 0.5000019073486328, 'height': 0.4305553436279297, 'depth': 0.1944446563720703, 'italic': 0.013888359069824219} | |
>>> # Kerning and ligature can be accessed as well. | |
>>> tfm.kerning[ord("c")] | |
{104: -0.02777862548828125, 107: -0.02777862548828125} | |
>>> tfm.ligatures[ord("f")] | |
{105: ('LIG', 12), 102: ('LIG', 11), 108: ('LIG', 13)} | |
""" | |
from types import SimpleNamespace | |
from fontTools.misc.sstruct import calcsize, unpack, unpack2 | |
SIZES_FORMAT = """ | |
> | |
lf: h # length of the entire file, in words | |
lh: h # length of the header data, in words | |
bc: h # smallest character code in the font | |
ec: h # largest character code in the font | |
nw: h # number of words in the width table | |
nh: h # number of words in the height table | |
nd: h # number of words in the depth table | |
ni: h # number of words in the italic correction table | |
nl: h # number of words in the ligature/kern table | |
nk: h # number of words in the kern table | |
ne: h # number of words in the extensible character table | |
np: h # number of font parameter words | |
""" | |
SIZES_SIZE = calcsize(SIZES_FORMAT) | |
FIXED_FORMAT = "12.20F" | |
HEADER_FORMAT1 = f""" | |
> | |
checksum: L | |
designsize: {FIXED_FORMAT} | |
""" | |
HEADER_FORMAT2 = f""" | |
{HEADER_FORMAT1} | |
codingscheme: 40p | |
""" | |
HEADER_FORMAT3 = f""" | |
{HEADER_FORMAT2} | |
family: 20p | |
""" | |
HEADER_FORMAT4 = f""" | |
{HEADER_FORMAT3} | |
seven_bit_safe_flag: ? | |
ignored: x | |
ignored: x | |
face: B | |
""" | |
HEADER_SIZE1 = calcsize(HEADER_FORMAT1) | |
HEADER_SIZE2 = calcsize(HEADER_FORMAT2) | |
HEADER_SIZE3 = calcsize(HEADER_FORMAT3) | |
HEADER_SIZE4 = calcsize(HEADER_FORMAT4) | |
LIG_KERN_COMMAND = """ | |
> | |
skip_byte: B | |
next_char: B | |
op_byte: B | |
remainder: B | |
""" | |
BASE_PARAMS = [ | |
"SLANT", | |
"SPACE", | |
"STRETCH", | |
"SHRINK", | |
"XHEIGHT", | |
"QUAD", | |
"EXTRASPACE", | |
] | |
MATHSY_PARAMS = [ | |
"NUM1", | |
"NUM2", | |
"NUM3", | |
"DENOM1", | |
"DENOM2", | |
"SUP1", | |
"SUP2", | |
"SUP3", | |
"SUB1", | |
"SUB2", | |
"SUPDROP", | |
"SUBDROP", | |
"DELIM1", | |
"DELIM2", | |
"AXISHEIGHT", | |
] | |
MATHEX_PARAMS = [ | |
"DEFAULTRULETHICKNESS", | |
"BIGOPSPACING1", | |
"BIGOPSPACING2", | |
"BIGOPSPACING3", | |
"BIGOPSPACING4", | |
"BIGOPSPACING5", | |
] | |
VANILLA = 0 | |
MATHSY = 1 | |
MATHEX = 2 | |
UNREACHABLE = 0 | |
PASSTHROUGH = 1 | |
ACCESSABLE = 2 | |
NO_TAG = 0 | |
LIG_TAG = 1 | |
LIST_TAG = 2 | |
EXT_TAG = 3 | |
STOP_FLAG = 128 | |
KERN_FLAG = 128 | |
class TFMException(Exception): | |
def __init__(self, message): | |
super().__init__(message) | |
class TFM: | |
def __init__(self, file): | |
self._read(file) | |
def __repr__(self): | |
return ( | |
f"<TFM" | |
f" for {self.family}" | |
f" in {self.codingscheme}" | |
f" at {self.designsize:g}pt>" | |
) | |
def _read(self, file): | |
if hasattr(file, "read"): | |
data = file.read() | |
else: | |
with open(file, "rb") as fp: | |
data = fp.read() | |
self._data = data | |
if len(data) < SIZES_SIZE: | |
raise TFMException("Too short input file") | |
sizes = SimpleNamespace() | |
unpack2(SIZES_FORMAT, data, sizes) | |
# Do some file structure sanity checks. | |
# TeX and TFtoPL do additional functional checks and might even correct | |
# “errors” in the input file, but we instead try to output the file as | |
# it is as long as it is parsable, even if the data make no sense. | |
if sizes.lf < 0: | |
raise TFMException("The file claims to have negative or zero length!") | |
if len(data) < sizes.lf * 4: | |
raise TFMException("The file has fewer bytes than it claims!") | |
for name, length in vars(sizes).items(): | |
if length < 0: | |
raise TFMException("The subfile size: '{name}' is negative!") | |
if sizes.lh < 2: | |
raise TFMException(f"The header length is only {sizes.lh}!") | |
if sizes.bc > sizes.ec + 1 or sizes.ec > 255: | |
raise TFMException( | |
f"The character code range {sizes.bc}..{sizes.ec} is illegal!" | |
) | |
if sizes.nw == 0 or sizes.nh == 0 or sizes.nd == 0 or sizes.ni == 0: | |
raise TFMException("Incomplete subfiles for character dimensions!") | |
if sizes.ne > 256: | |
raise TFMException(f"There are {ne} extensible recipes!") | |
if sizes.lf != ( | |
6 | |
+ sizes.lh | |
+ (sizes.ec - sizes.bc + 1) | |
+ sizes.nw | |
+ sizes.nh | |
+ sizes.nd | |
+ sizes.ni | |
+ sizes.nl | |
+ sizes.nk | |
+ sizes.ne | |
+ sizes.np | |
): | |
raise TFMException("Subfile sizes don’t add up to the stated total") | |
# Subfile offsets, used in the helper function below. These all are | |
# 32-bit word offsets not 8-bit byte offsets. | |
char_base = 6 + sizes.lh - sizes.bc | |
width_base = char_base + sizes.ec + 1 | |
height_base = width_base + sizes.nw | |
depth_base = height_base + sizes.nh | |
italic_base = depth_base + sizes.nd | |
lig_kern_base = italic_base + sizes.ni | |
kern_base = lig_kern_base + sizes.nl | |
exten_base = kern_base + sizes.nk | |
param_base = exten_base + sizes.ne | |
# Helper functions for accessing individual data. If this looks | |
# nonidiomatic Python, I blame the effect of reading the literate WEB | |
# documentation of TFtoPL. | |
def char_info(c): | |
return 4 * (char_base + c) | |
def width_index(c): | |
return data[char_info(c)] | |
def noneexistent(c): | |
return c < sizes.bc or c > sizes.ec or width_index(c) == 0 | |
def height_index(c): | |
return data[char_info(c) + 1] // 16 | |
def depth_index(c): | |
return data[char_info(c) + 1] % 16 | |
def italic_index(c): | |
return data[char_info(c) + 2] // 4 | |
def tag(c): | |
return data[char_info(c) + 2] % 4 | |
def remainder(c): | |
return data[char_info(c) + 3] | |
def width(c): | |
r = 4 * (width_base + width_index(c)) | |
return read_fixed(r, "v")["v"] | |
def height(c): | |
r = 4 * (height_base + height_index(c)) | |
return read_fixed(r, "v")["v"] | |
def depth(c): | |
r = 4 * (depth_base + depth_index(c)) | |
return read_fixed(r, "v")["v"] | |
def italic(c): | |
r = 4 * (italic_base + italic_index(c)) | |
return read_fixed(r, "v")["v"] | |
def exten(c): | |
return 4 * (exten_base + remainder(c)) | |
def lig_step(i): | |
return 4 * (lig_kern_base + i) | |
def lig_kern_command(i): | |
command = SimpleNamespace() | |
unpack2(LIG_KERN_COMMAND, data[i:], command) | |
return command | |
def kern(i): | |
r = 4 * (kern_base + i) | |
return read_fixed(r, "v")["v"] | |
def param(i): | |
return 4 * (param_base + i) | |
def read_fixed(index, key, obj=None): | |
ret = unpack2(f">;{key}:{FIXED_FORMAT}", data[index:], obj) | |
return ret[0] | |
# Set all attributes to empty values regardless of the header size. | |
unpack(HEADER_FORMAT4, [0] * HEADER_SIZE4, self) | |
offset = 24 | |
length = sizes.lh * 4 | |
self.extraheader = {} | |
if length >= HEADER_SIZE4: | |
rest = unpack2(HEADER_FORMAT4, data[offset:], self)[1] | |
if self.face < 18: | |
s = self.face % 2 | |
b = self.face // 2 | |
self.face = "MBL"[b % 3] + "RI"[s] + "RCE"[b // 3] | |
for i in range(sizes.lh - HEADER_SIZE4 // 4): | |
rest = unpack2(f">;HEADER{i + 18}:l", rest, self.extraheader)[1] | |
elif length >= HEADER_SIZE3: | |
unpack2(HEADER_FORMAT3, data[offset:], self) | |
elif length >= HEADER_SIZE2: | |
unpack2(HEADER_FORMAT2, data[offset:], self) | |
elif length >= HEADER_SIZE1: | |
unpack2(HEADER_FORMAT1, data[offset:], self) | |
self.fonttype = VANILLA | |
scheme = self.codingscheme.upper() | |
if scheme.startswith("TEX MATH SY"): | |
self.fonttype = MATHSY | |
elif scheme.startswith("TEX MATH EX"): | |
self.fonttype = MATHEX | |
self.fontdimens = {} | |
for i in range(sizes.np): | |
name = f"PARAMETER{i+1}" | |
if i <= 6: | |
name = BASE_PARAMS[i] | |
elif self.fonttype == MATHSY and i <= 21: | |
name = MATHSY_PARAMS[i - 7] | |
elif self.fonttype == MATHEX and i <= 12: | |
name = MATHEX_PARAMS[i - 7] | |
read_fixed(param(i), name, self.fontdimens) | |
lig_kern_map = {} | |
self.right_boundary_char = None | |
self.left_boundary_char = None | |
if sizes.nl > 0: | |
cmd = lig_kern_command(lig_step(0)) | |
if cmd.skip_byte == 255: | |
self.right_boundary_char = cmd.next_char | |
cmd = lig_kern_command(lig_step((sizes.nl - 1))) | |
if cmd.skip_byte == 255: | |
self.left_boundary_char = 256 | |
r = 256 * cmd.op_byte + cmd.remainder | |
lig_kern_map[self.left_boundary_char] = r | |
self.chars = {} | |
for c in range(sizes.bc, sizes.ec + 1): | |
if width_index(c) > 0: | |
self.chars[c] = info = {} | |
info["width"] = width(c) | |
if height_index(c) > 0: | |
info["height"] = height(c) | |
if depth_index(c) > 0: | |
info["depth"] = depth(c) | |
if italic_index(c) > 0: | |
info["italic"] = italic(c) | |
char_tag = tag(c) | |
if char_tag == NO_TAG: | |
pass | |
elif char_tag == LIG_TAG: | |
lig_kern_map[c] = remainder(c) | |
elif char_tag == LIST_TAG: | |
info["nextlarger"] = remainder(c) | |
elif char_tag == EXT_TAG: | |
info["varchar"] = varchar = {} | |
for i in range(4): | |
part = data[exten(c) + i] | |
if i == 3 or part > 0: | |
name = "rep" | |
if i == 0: | |
name = "top" | |
elif i == 1: | |
name = "mid" | |
elif i == 2: | |
name = "bot" | |
if noneexistent(part): | |
varchar[name] = c | |
else: | |
varchar[name] = part | |
self.ligatures = {} | |
self.kerning = {} | |
for c, i in sorted(lig_kern_map.items()): | |
cmd = lig_kern_command(lig_step(i)) | |
if cmd.skip_byte > STOP_FLAG: | |
i = 256 * cmd.op_byte + cmd.remainder | |
while i < sizes.nl: | |
cmd = lig_kern_command(lig_step(i)) | |
if cmd.skip_byte > STOP_FLAG: | |
pass | |
else: | |
if cmd.op_byte >= KERN_FLAG: | |
r = 256 * (cmd.op_byte - KERN_FLAG) + cmd.remainder | |
self.kerning.setdefault(c, {})[cmd.next_char] = kern(r) | |
else: | |
r = cmd.op_byte | |
if r == 4 or (r > 7 and r != 11): | |
# Ligature step with nonstandard code, we output | |
# the code verbatim. | |
lig = r | |
else: | |
lig = "" | |
if r % 4 > 1: | |
lig += "/" | |
lig += "LIG" | |
if r % 2 != 0: | |
lig += "/" | |
while r > 3: | |
lig += ">" | |
r -= 4 | |
self.ligatures.setdefault(c, {})[cmd.next_char] = ( | |
lig, | |
cmd.remainder, | |
) | |
if cmd.skip_byte >= STOP_FLAG: | |
break | |
i += cmd.skip_byte + 1 | |
if __name__ == "__main__": | |
import sys | |
tfm = TFM(sys.argv[1]) | |
print( | |
"\n".join( | |
x | |
for x in [ | |
f"tfm.checksum={tfm.checksum}", | |
f"tfm.designsize={tfm.designsize}", | |
f"tfm.codingscheme={tfm.codingscheme}", | |
f"tfm.fonttype={tfm.fonttype}", | |
f"tfm.family={tfm.family}", | |
f"tfm.seven_bit_safe_flag={tfm.seven_bit_safe_flag}", | |
f"tfm.face={tfm.face}", | |
f"tfm.extraheader={tfm.extraheader}", | |
f"tfm.fontdimens={tfm.fontdimens}", | |
f"tfm.right_boundary_char={tfm.right_boundary_char}", | |
f"tfm.left_boundary_char={tfm.left_boundary_char}", | |
f"tfm.kerning={tfm.kerning}", | |
f"tfm.ligatures={tfm.ligatures}", | |
f"tfm.chars={tfm.chars}", | |
] | |
) | |
) | |
print(tfm) | |