Source code for coniferest.sklearn.isoforest

import numpy as np

from ..evaluator import ForestEvaluator


[docs] class IsolationForestEvaluator(ForestEvaluator): def __init__(self, isoforest): """ Create evaluator for sklearn's version of isolation forest. Parameters ---------- isoforest Sklearn's isolation forest instance. """ selectors_list = [self.extract_selectors(e) for e in isoforest.estimators_] selectors, node_offsets, leaf_offsets = self.combine_selectors(selectors_list) super(IsolationForestEvaluator, self).__init__( samples=isoforest.max_samples_, selectors=selectors, node_offsets=node_offsets, leaf_offsets=leaf_offsets, num_threads=isoforest.n_jobs, sampletrees_per_batch=1 << 20, )
[docs] @classmethod def extract_selectors(cls, estimator): nodes = estimator.tree_.__getstate__()["nodes"] selectors = np.zeros_like(nodes, dtype=cls.selector_dtype) selectors["feature"] = nodes["feature"] selectors["feature"][selectors["feature"] < 0] = -1 selectors["left"] = nodes["left_child"] selectors["right"] = nodes["right_child"] selectors["value"] = nodes["threshold"] n_node_samples = nodes["n_node_samples"] def correct_values(i, depth): if selectors[i]["feature"] < 0: selectors[i]["value"] = depth + cls.average_path_length(n_node_samples[i]) else: correct_values(selectors[i]["left"], depth + 1) correct_values(selectors[i]["right"], depth + 1) correct_values(0, 0) return selectors
[docs] @classmethod def average_path_length(cls, n): """ Our average_path_length is a bit different from sklearn's one. So we reproduce the sklearn's realization here. """ if n <= 1: return 0 elif n == 2: return 1 else: return 2.0 * (np.log(n - 1.0) + np.euler_gamma) - 2.0 * (n - 1.0) / n