Source code for simpleai.machine_learning.models

#!/usr/bin/env python
# coding: utf-8

"""
Basic API for modeling a classification problem.
"""

try:
    import cPickle as pickle
except ImportError:
    import pickle


[docs]class Classifier(object): """ Base of all classifiers. This specifies the classifier API. Each classifier holds at least a dataset and a ClassificationProblem. """ def __init__(self, dataset, problem): self.dataset = dataset self.problem = problem self.learn()
[docs] def learn(self): """ Does the training. Returns nothing. """ raise NotImplementedError()
@property def attributes(self): """ The attributes of the problem. A list of callable objects. """ return self.problem.attributes @property def target(self): """ The problem's target. A callable that takes an observation and returns the correct classification for it. """ return self.problem.target
[docs] def classify(self, example): """ Returns the classification for example. """ raise NotImplementedError()
[docs] def save(self, filepath): """ Pickles the tree and saves it into `filepath` """ if not filepath or not isinstance(filepath, str): raise ValueError("Invalid filepath") # Removes dataset so is not saved in the pickle self.dataset = None with open(filepath, "wb") as filehandler: pickle.dump(self, filehandler)
[docs] def distance(self, a, b): """ Custom distance between `a` and `b`. """ raise NotImplementedError()
[docs] @classmethod def load(cls, filepath): """ Loads a pickled version of the classifier saved in `filepath` """ with open(filepath, "rb") as filehandler: classifier = pickle.load(filehandler) if not isinstance(classifier, Classifier): raise ValueError("Pickled object is not a Classifier") return classifier
class ClassificationProblem(object): """ Abstract representation of a classification problem. It holds the attributes to be tested and defines them "target" of an example. You can define attributes by adding them to the `attributes` list or by defining a method and decorating it with `is_attribute`. The target method returns the real classification of an example from the dataset. """ def __init__(self): self._load_self_attributes() def _load_self_attributes(self, attrs=None): if attrs is None: attrs = [] for name in dir(self): method = getattr(self, name) if hasattr(method, "is_attribute"): attr = Attribute(method, method.name) attrs.append(attr) self.attributes = attrs # This sort is useful in cases where attributes are feeded vectorized # to the classifier (like SVMs) and you want to pickle and unpickle it # safely. # Requieres attributes to have names. self.attributes.sort(key=lambda attr: attr.name) def target(self, example): """ Given an example it returns the classification for that example. """ raise NotImplementedError() def __getstate__(self): # For pickle-ability of method objects attributes = [a for a in self.attributes if not hasattr(a.function, "is_attribute")] d = dict(self.__dict__) d["attributes"] = attributes return d def __setstate__(self, d): # For pickle-ability for name, value in list(d.items()): setattr(self, name, value) self._load_self_attributes(self.attributes) class VectorDataClassificationProblem(ClassificationProblem): """ A classification problem that defines attribute for a dataset that is a set of vectors. An attribute for each index of the vector is created. """ def __init__(self, dataset, target_index): """ `dataset` should be an iterable, *not* an iterator. `target_index` is the index in the vector where the classification of an example is defined. """ super(VectorDataClassificationProblem, self).__init__() try: example = next(iter(dataset)) except StopIteration: raise ValueError("Dataset is empty") self.target_index = target_index N = len(example) if self.target_index < 0: # Negative number allowed, counts in reverse self.target_index = N + self.target_index if self.target_index < 0 or N <= self.target_index: raise ValueError("Target index is out of range") for i in range(N): if i == self.target_index: continue attribute = VectorIndexAttribute(i, "data at index {}".format(i)) self.attributes.append(attribute) def target(self, example): """ Uses the target defined in the creation of the vector problem to return the target of `example`. """ return example[self.target_index] class Attribute(object): """ Abstract base of an attribute, a feature to be tested on the examples. """ def __init__(self, function=None, name=None, description=None): """ Creates an attribute with `function`. Adds a name and a description if it's specified. """ self.name = name self.function = function self.description = description def reason(self, example): """ Returns a string with an explanation of why the attribute is being applied. """ raise NotImplementedError() def __call__(self, example): return self.function(example) def __str__(self): if self.name is None: return "<undefined name>" return self.name class VectorIndexAttribute(Attribute): """ Attribute that returns the n-th element from a vector. """ def __init__(self, n, name=None, description=None): super(VectorIndexAttribute, self).__init__(self, name, description) self.n = n def reason(self, vector): message = "{} is the {}-th element of the vector" return message.format(vector[self.n], self.n) def __call__(self, vector): return vector[self.n] def is_attribute(method, name=None): """ Decorator for methods that are attributes. """ if name is None: name = method.__name__ method.is_attribute = True method.name = name return method