Source code for coniferest.isoforest

from .coniferest import Coniferest, ConiferestEvaluator

__all__ = ["IsolationForest"]


[docs] class IsolationForest(Coniferest): """ Isolation forest. This is a reimplementation of sklearn.ensemble.IsolationForest, which trains and evaluates much faster. It also supports multi-threading for evaluation (sample scoring). Parameters ---------- n_trees : int, optional Number of trees in forest to build. n_subsamples : int, optional Number of subsamples to use for building the trees. max_depth : int or None, optional Maximal tree depth. If None, `log2(n_subsamples)` is used. n_jobs : int, default=-1 Number of threads to use for evaluation. If -1, use all available CPUs. random_seed : int or None, optional Seed for reproducibility. If None, random seed is used. """ def __init__( self, n_trees=100, n_subsamples=256, max_depth=None, n_jobs=-1, random_seed=None, sampletrees_per_batch=1 << 20, ): super().__init__( trees=[], n_subsamples=n_subsamples, max_depth=max_depth, n_jobs=n_jobs, random_seed=random_seed, sampletrees_per_batch=sampletrees_per_batch, ) self.n_trees = n_trees self.evaluator = None
[docs] def fit(self, data, labels=None): """ Build the trees based on data. Parameters ---------- data 2-d array with features. labels Unused. Defaults to None. Returns ------- self """ self.trees = self.build_trees(data, self.n_trees) self.evaluator = ConiferestEvaluator(self) return self
[docs] def score_samples(self, samples): """ Compute scores for given samples. Parameters ---------- samples 2-d array with features. Returns ------- 1-d array with scores. """ return self.evaluator.score_samples(samples)
[docs] def fit_known(self, data, known_data=None, known_labels=None): return self.fit(data)
[docs] def feature_signature(self, x): return self.evaluator.feature_signature(x)
[docs] def feature_importance(self, x): return self.evaluator.feature_importance(x)
[docs] def apply(self, x): """ Apply the forest to X, return leaf indices. Parameters ---------- x : ndarray shape (n_samples, n_features) 2-d array with features. Returns ------- x_leafs : ndarray of shape (n_samples, n_estimators) For each datapoint x in X and for each tree in the forest, return the index of the leaf x ends up in. """ return self.evaluator.apply(x)