Spaces:
Running
Running
""" fontTools.misc.classifyTools.py -- tools for classifying things. | |
""" | |
class Classifier(object): | |
""" | |
Main Classifier object, used to classify things into similar sets. | |
""" | |
def __init__(self, sort=True): | |
self._things = set() # set of all things known so far | |
self._sets = [] # list of class sets produced so far | |
self._mapping = {} # map from things to their class set | |
self._dirty = False | |
self._sort = sort | |
def add(self, set_of_things): | |
""" | |
Add a set to the classifier. Any iterable is accepted. | |
""" | |
if not set_of_things: | |
return | |
self._dirty = True | |
things, sets, mapping = self._things, self._sets, self._mapping | |
s = set(set_of_things) | |
intersection = s.intersection(things) # existing things | |
s.difference_update(intersection) # new things | |
difference = s | |
del s | |
# Add new class for new things | |
if difference: | |
things.update(difference) | |
sets.append(difference) | |
for thing in difference: | |
mapping[thing] = difference | |
del difference | |
while intersection: | |
# Take one item and process the old class it belongs to | |
old_class = mapping[next(iter(intersection))] | |
old_class_intersection = old_class.intersection(intersection) | |
# Update old class to remove items from new set | |
old_class.difference_update(old_class_intersection) | |
# Remove processed items from todo list | |
intersection.difference_update(old_class_intersection) | |
# Add new class for the intersection with old class | |
sets.append(old_class_intersection) | |
for thing in old_class_intersection: | |
mapping[thing] = old_class_intersection | |
del old_class_intersection | |
def update(self, list_of_sets): | |
""" | |
Add a a list of sets to the classifier. Any iterable of iterables is accepted. | |
""" | |
for s in list_of_sets: | |
self.add(s) | |
def _process(self): | |
if not self._dirty: | |
return | |
# Do any deferred processing | |
sets = self._sets | |
self._sets = [s for s in sets if s] | |
if self._sort: | |
self._sets = sorted(self._sets, key=lambda s: (-len(s), sorted(s))) | |
self._dirty = False | |
# Output methods | |
def getThings(self): | |
"""Returns the set of all things known so far. | |
The return value belongs to the Classifier object and should NOT | |
be modified while the classifier is still in use. | |
""" | |
self._process() | |
return self._things | |
def getMapping(self): | |
"""Returns the mapping from things to their class set. | |
The return value belongs to the Classifier object and should NOT | |
be modified while the classifier is still in use. | |
""" | |
self._process() | |
return self._mapping | |
def getClasses(self): | |
"""Returns the list of class sets. | |
The return value belongs to the Classifier object and should NOT | |
be modified while the classifier is still in use. | |
""" | |
self._process() | |
return self._sets | |
def classify(list_of_sets, sort=True): | |
""" | |
Takes a iterable of iterables (list of sets from here on; but any | |
iterable works.), and returns the smallest list of sets such that | |
each set, is either a subset, or is disjoint from, each of the input | |
sets. | |
In other words, this function classifies all the things present in | |
any of the input sets, into similar classes, based on which sets | |
things are a member of. | |
If sort=True, return class sets are sorted by decreasing size and | |
their natural sort order within each class size. Otherwise, class | |
sets are returned in the order that they were identified, which is | |
generally not significant. | |
>>> classify([]) == ([], {}) | |
True | |
>>> classify([[]]) == ([], {}) | |
True | |
>>> classify([[], []]) == ([], {}) | |
True | |
>>> classify([[1]]) == ([{1}], {1: {1}}) | |
True | |
>>> classify([[1,2]]) == ([{1, 2}], {1: {1, 2}, 2: {1, 2}}) | |
True | |
>>> classify([[1],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}}) | |
True | |
>>> classify([[1,2],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}}) | |
True | |
>>> classify([[1,2],[2,4]]) == ([{1}, {2}, {4}], {1: {1}, 2: {2}, 4: {4}}) | |
True | |
>>> classify([[1,2],[2,4,5]]) == ( | |
... [{4, 5}, {1}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}}) | |
True | |
>>> classify([[1,2],[2,4,5]], sort=False) == ( | |
... [{1}, {4, 5}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}}) | |
True | |
>>> classify([[1,2,9],[2,4,5]], sort=False) == ( | |
... [{1, 9}, {4, 5}, {2}], {1: {1, 9}, 2: {2}, 4: {4, 5}, 5: {4, 5}, | |
... 9: {1, 9}}) | |
True | |
>>> classify([[1,2,9,15],[2,4,5]], sort=False) == ( | |
... [{1, 9, 15}, {4, 5}, {2}], {1: {1, 9, 15}, 2: {2}, 4: {4, 5}, | |
... 5: {4, 5}, 9: {1, 9, 15}, 15: {1, 9, 15}}) | |
True | |
>>> classes, mapping = classify([[1,2,9,15],[2,4,5],[15,5]], sort=False) | |
>>> set([frozenset(c) for c in classes]) == set( | |
... [frozenset(s) for s in ({1, 9}, {4}, {2}, {5}, {15})]) | |
True | |
>>> mapping == {1: {1, 9}, 2: {2}, 4: {4}, 5: {5}, 9: {1, 9}, 15: {15}} | |
True | |
""" | |
classifier = Classifier(sort=sort) | |
classifier.update(list_of_sets) | |
return classifier.getClasses(), classifier.getMapping() | |
if __name__ == "__main__": | |
import sys, doctest | |
sys.exit(doctest.testmod(optionflags=doctest.ELLIPSIS).failed) | |