Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/neighbors/init.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/init.py
@ -0,0 +1,37 @@
+"""
+The :mod:`sklearn.neighbors` module implements the k-nearest neighbors
+algorithm.
+"""
+
+from ._ball_tree import BallTree
+from ._kd_tree import KDTree
+from ._dist_metrics import DistanceMetric
+from ._graph import kneighbors_graph, radius_neighbors_graph
+from ._graph import KNeighborsTransformer, RadiusNeighborsTransformer
+from ._unsupervised import NearestNeighbors
+from ._classification import KNeighborsClassifier, RadiusNeighborsClassifier
+from ._regression import KNeighborsRegressor, RadiusNeighborsRegressor
+from ._nearest_centroid import NearestCentroid
+from ._kde import KernelDensity
+from ._lof import LocalOutlierFactor
+from ._nca import NeighborhoodComponentsAnalysis
+from ._base import VALID_METRICS, VALID_METRICS_SPARSE
+
+__all__ = ['BallTree',
+           'DistanceMetric',
+           'KDTree',
+           'KNeighborsClassifier',
+           'KNeighborsRegressor',
+           'KNeighborsTransformer',
+           'NearestCentroid',
+           'NearestNeighbors',
+           'RadiusNeighborsClassifier',
+           'RadiusNeighborsRegressor',
+           'RadiusNeighborsTransformer',
+           'kneighbors_graph',
+           'radius_neighbors_graph',
+           'KernelDensity',
+           'LocalOutlierFactor',
+           'NeighborhoodComponentsAnalysis',
+           'VALID_METRICS',
+           'VALID_METRICS_SPARSE']
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/_base.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/_base.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/_classification.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/_classification.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/_graph.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/_graph.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/_kde.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/_kde.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/_lof.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/_lof.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/_nca.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/_nca.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/_nearest_centroid.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/_nearest_centroid.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/_regression.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/_regression.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/_unsupervised.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/_unsupervised.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/ball_tree.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/ball_tree.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/base.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/base.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/classification.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/classification.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/dist_metrics.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/dist_metrics.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/graph.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/graph.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/kd_tree.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/kd_tree.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/kde.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/kde.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/lof.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/lof.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/nca.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/nca.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/nearest_centroid.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/nearest_centroid.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/quad_tree.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/quad_tree.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/regression.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/regression.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/setup.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/setup.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/typedefs.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/typedefs.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/pycache/unsupervised.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/pycache/unsupervised.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/_ball_tree.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/neighbors/_ball_tree.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/neighbors/_base.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/_base.py
--- a/venv/Lib/site-packages/sklearn/neighbors/_classification.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/_classification.py
@ -0,0 +1,583 @@
+"""Nearest Neighbor Classification"""
+
+# Authors: Jake Vanderplas <vanderplas@astro.washington.edu>
+#          Fabian Pedregosa <fabian.pedregosa@inria.fr>
+#          Alexandre Gramfort <alexandre.gramfort@inria.fr>
+#          Sparseness support by Lars Buitinck
+#          Multi-output support by Arnaud Joly <a.joly@ulg.ac.be>
+#
+# License: BSD 3 clause (C) INRIA, University of Amsterdam
+
+import numpy as np
+from scipy import stats
+from ..utils.extmath import weighted_mode
+from ..utils.validation import _is_arraylike, _num_samples
+
+import warnings
+from ._base import \
+    _check_weights, _get_weights, \
+    NeighborsBase, KNeighborsMixin,\
+    RadiusNeighborsMixin, SupervisedIntegerMixin
+from ..base import ClassifierMixin
+from ..utils import check_array
+from ..utils.validation import _deprecate_positional_args
+
+
+class KNeighborsClassifier(NeighborsBase, KNeighborsMixin,
+                           SupervisedIntegerMixin, ClassifierMixin):
+    """Classifier implementing the k-nearest neighbors vote.
+
+    Read more in the :ref:`User Guide <classification>`.
+
+    Parameters
+    ----------
+    n_neighbors : int, default=5
+        Number of neighbors to use by default for :meth:`kneighbors` queries.
+
+    weights : {'uniform', 'distance'} or callable, default='uniform'
+        weight function used in prediction.  Possible values:
+
+        - 'uniform' : uniform weights.  All points in each neighborhood
+          are weighted equally.
+        - 'distance' : weight points by the inverse of their distance.
+          in this case, closer neighbors of a query point will have a
+          greater influence than neighbors which are further away.
+        - [callable] : a user-defined function which accepts an
+          array of distances, and returns an array of the same shape
+          containing the weights.
+
+    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
+        Algorithm used to compute the nearest neighbors:
+
+        - 'ball_tree' will use :class:`BallTree`
+        - 'kd_tree' will use :class:`KDTree`
+        - 'brute' will use a brute-force search.
+        - 'auto' will attempt to decide the most appropriate algorithm
+          based on the values passed to :meth:`fit` method.
+
+        Note: fitting on sparse input will override the setting of
+        this parameter, using brute force.
+
+    leaf_size : int, default=30
+        Leaf size passed to BallTree or KDTree.  This can affect the
+        speed of the construction and query, as well as the memory
+        required to store the tree.  The optimal value depends on the
+        nature of the problem.
+
+    p : int, default=2
+        Power parameter for the Minkowski metric. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
+    metric : str or callable, default='minkowski'
+        the distance metric to use for the tree.  The default metric is
+        minkowski, and with p=2 is equivalent to the standard Euclidean
+        metric. See the documentation of :class:`DistanceMetric` for a
+        list of available metrics.
+        If metric is "precomputed", X is assumed to be a distance matrix and
+        must be square during fit. X may be a :term:`sparse graph`,
+        in which case only "nonzero" elements may be considered neighbors.
+
+    metric_params : dict, default=None
+        Additional keyword arguments for the metric function.
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run for neighbors search.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+        Doesn't affect :meth:`fit` method.
+
+    Attributes
+    ----------
+    classes_ : array of shape (n_classes,)
+        Class labels known to the classifier
+
+    effective_metric_ : str or callble
+        The distance metric used. It will be same as the `metric` parameter
+        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to
+        'minkowski' and `p` parameter set to 2.
+
+    effective_metric_params_ : dict
+        Additional keyword arguments for the metric function. For most metrics
+        will be same with `metric_params` parameter, but may also contain the
+        `p` parameter value if the `effective_metric_` attribute is set to
+        'minkowski'.
+
+    outputs_2d_ : bool
+        False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit
+        otherwise True.
+
+    Examples
+    --------
+    >>> X = [[0], [1], [2], [3]]
+    >>> y = [0, 0, 1, 1]
+    >>> from sklearn.neighbors import KNeighborsClassifier
+    >>> neigh = KNeighborsClassifier(n_neighbors=3)
+    >>> neigh.fit(X, y)
+    KNeighborsClassifier(...)
+    >>> print(neigh.predict([[1.1]]))
+    [0]
+    >>> print(neigh.predict_proba([[0.9]]))
+    [[0.66666667 0.33333333]]
+
+    See also
+    --------
+    RadiusNeighborsClassifier
+    KNeighborsRegressor
+    RadiusNeighborsRegressor
+    NearestNeighbors
+
+    Notes
+    -----
+    See :ref:`Nearest Neighbors <neighbors>` in the online documentation
+    for a discussion of the choice of ``algorithm`` and ``leaf_size``.
+
+    .. warning::
+
+       Regarding the Nearest Neighbors algorithms, if it is found that two
+       neighbors, neighbor `k+1` and `k`, have identical distances
+       but different labels, the results will depend on the ordering of the
+       training data.
+
+    https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm
+    """
+
+    @_deprecate_positional_args
+    def __init__(self, n_neighbors=5, *,
+                 weights='uniform', algorithm='auto', leaf_size=30,
+                 p=2, metric='minkowski', metric_params=None, n_jobs=None,
+                 **kwargs):
+        super().__init__(
+            n_neighbors=n_neighbors,
+            algorithm=algorithm,
+            leaf_size=leaf_size, metric=metric, p=p,
+            metric_params=metric_params,
+            n_jobs=n_jobs, **kwargs)
+        self.weights = _check_weights(weights)
+
+    def predict(self, X):
+        """Predict the class labels for the provided data.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_queries, n_features), \
+                or (n_queries, n_indexed) if metric == 'precomputed'
+            Test samples.
+
+        Returns
+        -------
+        y : ndarray of shape (n_queries,) or (n_queries, n_outputs)
+            Class labels for each data sample.
+        """
+        X = check_array(X, accept_sparse='csr')
+
+        neigh_dist, neigh_ind = self.kneighbors(X)
+        classes_ = self.classes_
+        _y = self._y
+        if not self.outputs_2d_:
+            _y = self._y.reshape((-1, 1))
+            classes_ = [self.classes_]
+
+        n_outputs = len(classes_)
+        n_queries = _num_samples(X)
+        weights = _get_weights(neigh_dist, self.weights)
+
+        y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype)
+        for k, classes_k in enumerate(classes_):
+            if weights is None:
+                mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
+            else:
+                mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1)
+
+            mode = np.asarray(mode.ravel(), dtype=np.intp)
+            y_pred[:, k] = classes_k.take(mode)
+
+        if not self.outputs_2d_:
+            y_pred = y_pred.ravel()
+
+        return y_pred
+
+    def predict_proba(self, X):
+        """Return probability estimates for the test data X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_queries, n_features), \
+                or (n_queries, n_indexed) if metric == 'precomputed'
+            Test samples.
+
+        Returns
+        -------
+        p : ndarray of shape (n_queries, n_classes), or a list of n_outputs
+            of such arrays if n_outputs > 1.
+            The class probabilities of the input samples. Classes are ordered
+            by lexicographic order.
+        """
+        X = check_array(X, accept_sparse='csr')
+
+        neigh_dist, neigh_ind = self.kneighbors(X)
+
+        classes_ = self.classes_
+        _y = self._y
+        if not self.outputs_2d_:
+            _y = self._y.reshape((-1, 1))
+            classes_ = [self.classes_]
+
+        n_queries = _num_samples(X)
+
+        weights = _get_weights(neigh_dist, self.weights)
+        if weights is None:
+            weights = np.ones_like(neigh_ind)
+
+        all_rows = np.arange(X.shape[0])
+        probabilities = []
+        for k, classes_k in enumerate(classes_):
+            pred_labels = _y[:, k][neigh_ind]
+            proba_k = np.zeros((n_queries, classes_k.size))
+
+            # a simple ':' index doesn't work right
+            for i, idx in enumerate(pred_labels.T):  # loop is O(n_neighbors)
+                proba_k[all_rows, idx] += weights[:, i]
+
+            # normalize 'votes' into real [0,1] probabilities
+            normalizer = proba_k.sum(axis=1)[:, np.newaxis]
+            normalizer[normalizer == 0.0] = 1.0
+            proba_k /= normalizer
+
+            probabilities.append(proba_k)
+
+        if not self.outputs_2d_:
+            probabilities = probabilities[0]
+
+        return probabilities
+
+
+class RadiusNeighborsClassifier(NeighborsBase, RadiusNeighborsMixin,
+                                SupervisedIntegerMixin, ClassifierMixin):
+    """Classifier implementing a vote among neighbors within a given radius
+
+    Read more in the :ref:`User Guide <classification>`.
+
+    Parameters
+    ----------
+    radius : float, default=1.0
+        Range of parameter space to use by default for :meth:`radius_neighbors`
+        queries.
+
+    weights : {'uniform', 'distance'} or callable, default='uniform'
+        weight function used in prediction.  Possible values:
+
+        - 'uniform' : uniform weights.  All points in each neighborhood
+          are weighted equally.
+        - 'distance' : weight points by the inverse of their distance.
+          in this case, closer neighbors of a query point will have a
+          greater influence than neighbors which are further away.
+        - [callable] : a user-defined function which accepts an
+          array of distances, and returns an array of the same shape
+          containing the weights.
+
+        Uniform weights are used by default.
+
+    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
+        Algorithm used to compute the nearest neighbors:
+
+        - 'ball_tree' will use :class:`BallTree`
+        - 'kd_tree' will use :class:`KDTree`
+        - 'brute' will use a brute-force search.
+        - 'auto' will attempt to decide the most appropriate algorithm
+          based on the values passed to :meth:`fit` method.
+
+        Note: fitting on sparse input will override the setting of
+        this parameter, using brute force.
+
+    leaf_size : int, default=30
+        Leaf size passed to BallTree or KDTree.  This can affect the
+        speed of the construction and query, as well as the memory
+        required to store the tree.  The optimal value depends on the
+        nature of the problem.
+
+    p : int, default=2
+        Power parameter for the Minkowski metric. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
+    metric : str or callable, default='minkowski'
+        the distance metric to use for the tree.  The default metric is
+        minkowski, and with p=2 is equivalent to the standard Euclidean
+        metric. See the documentation of :class:`DistanceMetric` for a
+        list of available metrics.
+        If metric is "precomputed", X is assumed to be a distance matrix and
+        must be square during fit. X may be a :term:`sparse graph`,
+        in which case only "nonzero" elements may be considered neighbors.
+
+    outlier_label : {manual label, 'most_frequent'}, default=None
+        label for outlier samples (samples with no neighbors in given radius).
+
+        - manual label: str or int label (should be the same type as y)
+          or list of manual labels if multi-output is used.
+        - 'most_frequent' : assign the most frequent label of y to outliers.
+        - None : when any outlier is detected, ValueError will be raised.
+
+    metric_params : dict, default=None
+        Additional keyword arguments for the metric function.
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run for neighbors search.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    classes_ : ndarray of shape (n_classes,)
+        Class labels known to the classifier.
+
+    effective_metric_ : str or callble
+        The distance metric used. It will be same as the `metric` parameter
+        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to
+        'minkowski' and `p` parameter set to 2.
+
+    effective_metric_params_ : dict
+        Additional keyword arguments for the metric function. For most metrics
+        will be same with `metric_params` parameter, but may also contain the
+        `p` parameter value if the `effective_metric_` attribute is set to
+        'minkowski'.
+
+    outputs_2d_ : bool
+        False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit
+        otherwise True.
+
+    Examples
+    --------
+    >>> X = [[0], [1], [2], [3]]
+    >>> y = [0, 0, 1, 1]
+    >>> from sklearn.neighbors import RadiusNeighborsClassifier
+    >>> neigh = RadiusNeighborsClassifier(radius=1.0)
+    >>> neigh.fit(X, y)
+    RadiusNeighborsClassifier(...)
+    >>> print(neigh.predict([[1.5]]))
+    [0]
+    >>> print(neigh.predict_proba([[1.0]]))
+    [[0.66666667 0.33333333]]
+
+    See also
+    --------
+    KNeighborsClassifier
+    RadiusNeighborsRegressor
+    KNeighborsRegressor
+    NearestNeighbors
+
+    Notes
+    -----
+    See :ref:`Nearest Neighbors <neighbors>` in the online documentation
+    for a discussion of the choice of ``algorithm`` and ``leaf_size``.
+
+    https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm
+    """
+
+    @_deprecate_positional_args
+    def __init__(self, radius=1.0, *, weights='uniform',
+                 algorithm='auto', leaf_size=30, p=2, metric='minkowski',
+                 outlier_label=None, metric_params=None, n_jobs=None,
+                 **kwargs):
+        super().__init__(
+              radius=radius,
+              algorithm=algorithm,
+              leaf_size=leaf_size,
+              metric=metric, p=p, metric_params=metric_params,
+              n_jobs=n_jobs, **kwargs)
+        self.weights = _check_weights(weights)
+        self.outlier_label = outlier_label
+
+    def fit(self, X, y):
+        """Fit the model using X as training data and y as target values
+
+        Parameters
+        ----------
+        X : BallTree, KDTree or {array-like, sparse matrix} of shape \
+                (n_samples, n_features) or (n_samples, n_samples)
+            Training data. If array or matrix, the shape is (n_samples,
+            n_features), or (n_samples, n_samples) if metric='precomputed'.
+
+        y : {array-like, sparse matrix} of shape (n_samples,) or \
+                (n_samples, n_output)
+            Target values.
+
+        """
+
+        SupervisedIntegerMixin.fit(self, X, y)
+
+        classes_ = self.classes_
+        _y = self._y
+        if not self.outputs_2d_:
+            _y = self._y.reshape((-1, 1))
+            classes_ = [self.classes_]
+
+        if self.outlier_label is None:
+            outlier_label_ = None
+
+        elif self.outlier_label == 'most_frequent':
+            outlier_label_ = []
+            # iterate over multi-output, get the most frequest label for each
+            # output.
+            for k, classes_k in enumerate(classes_):
+                label_count = np.bincount(_y[:, k])
+                outlier_label_.append(classes_k[label_count.argmax()])
+
+        else:
+            if (_is_arraylike(self.outlier_label) and
+               not isinstance(self.outlier_label, str)):
+                if len(self.outlier_label) != len(classes_):
+                    raise ValueError("The length of outlier_label: {} is "
+                                     "inconsistent with the output "
+                                     "length: {}".format(self.outlier_label,
+                                                         len(classes_)))
+                outlier_label_ = self.outlier_label
+            else:
+                outlier_label_ = [self.outlier_label] * len(classes_)
+
+            for classes, label in zip(classes_, outlier_label_):
+                if (_is_arraylike(label) and
+                   not isinstance(label, str)):
+                    # ensure the outlier lable for each output is a scalar.
+                    raise TypeError("The outlier_label of classes {} is "
+                                    "supposed to be a scalar, got "
+                                    "{}.".format(classes, label))
+                if np.append(classes, label).dtype != classes.dtype:
+                    # ensure the dtype of outlier label is consistent with y.
+                    raise TypeError("The dtype of outlier_label {} is "
+                                    "inconsistent with classes {} in "
+                                    "y.".format(label, classes))
+
+        self.outlier_label_ = outlier_label_
+        return self
+
+    def predict(self, X):
+        """Predict the class labels for the provided data.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_queries, n_features), \
+                or (n_queries, n_indexed) if metric == 'precomputed'
+            Test samples.
+
+        Returns
+        -------
+        y : ndarray of shape (n_queries,) or (n_queries, n_outputs)
+            Class labels for each data sample.
+        """
+
+        probs = self.predict_proba(X)
+        classes_ = self.classes_
+
+        if not self.outputs_2d_:
+            probs = [probs]
+            classes_ = [self.classes_]
+
+        n_outputs = len(classes_)
+        n_queries = probs[0].shape[0]
+        y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype)
+
+        for k, prob in enumerate(probs):
+            # iterate over multi-output, assign labels based on probabilities
+            # of each output.
+            max_prob_index = prob.argmax(axis=1)
+            y_pred[:, k] = classes_[k].take(max_prob_index)
+
+            outlier_zero_probs = (prob == 0).all(axis=1)
+            if outlier_zero_probs.any():
+                zero_prob_index = np.flatnonzero(outlier_zero_probs)
+                y_pred[zero_prob_index, k] = self.outlier_label_[k]
+
+        if not self.outputs_2d_:
+            y_pred = y_pred.ravel()
+
+        return y_pred
+
+    def predict_proba(self, X):
+        """Return probability estimates for the test data X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_queries, n_features), \
+                or (n_queries, n_indexed) if metric == 'precomputed'
+            Test samples.
+
+        Returns
+        -------
+        p : ndarray of shape (n_queries, n_classes), or a list of n_outputs
+            of such arrays if n_outputs > 1.
+            The class probabilities of the input samples. Classes are ordered
+            by lexicographic order.
+        """
+
+        X = check_array(X, accept_sparse='csr')
+        n_queries = _num_samples(X)
+
+        neigh_dist, neigh_ind = self.radius_neighbors(X)
+        outlier_mask = np.zeros(n_queries, dtype=np.bool)
+        outlier_mask[:] = [len(nind) == 0 for nind in neigh_ind]
+        outliers = np.flatnonzero(outlier_mask)
+        inliers = np.flatnonzero(~outlier_mask)
+
+        classes_ = self.classes_
+        _y = self._y
+        if not self.outputs_2d_:
+            _y = self._y.reshape((-1, 1))
+            classes_ = [self.classes_]
+
+        if self.outlier_label_ is None and outliers.size > 0:
+            raise ValueError('No neighbors found for test samples %r, '
+                             'you can try using larger radius, '
+                             'giving a label for outliers, '
+                             'or considering removing them from your dataset.'
+                             % outliers)
+
+        weights = _get_weights(neigh_dist, self.weights)
+        if weights is not None:
+            weights = weights[inliers]
+
+        probabilities = []
+        # iterate over multi-output, measure probabilities of the k-th output.
+        for k, classes_k in enumerate(classes_):
+            pred_labels = np.zeros(len(neigh_ind), dtype=object)
+            pred_labels[:] = [_y[ind, k] for ind in neigh_ind]
+
+            proba_k = np.zeros((n_queries, classes_k.size))
+            proba_inl = np.zeros((len(inliers), classes_k.size))
+
+            # samples have different size of neighbors within the same radius
+            if weights is None:
+                for i, idx in enumerate(pred_labels[inliers]):
+                    proba_inl[i, :] = np.bincount(idx,
+                                                  minlength=classes_k.size)
+            else:
+                for i, idx in enumerate(pred_labels[inliers]):
+                    proba_inl[i, :] = np.bincount(idx,
+                                                  weights[i],
+                                                  minlength=classes_k.size)
+            proba_k[inliers, :] = proba_inl
+
+            if outliers.size > 0:
+                _outlier_label = self.outlier_label_[k]
+                label_index = np.flatnonzero(classes_k == _outlier_label)
+                if label_index.size == 1:
+                    proba_k[outliers, label_index[0]] = 1.0
+                else:
+                    warnings.warn('Outlier label {} is not in training '
+                                  'classes. All class probabilities of '
+                                  'outliers will be assigned with 0.'
+                                  ''.format(self.outlier_label_[k]))
+
+            # normalize 'votes' into real [0,1] probabilities
+            normalizer = proba_k.sum(axis=1)[:, np.newaxis]
+            normalizer[normalizer == 0.0] = 1.0
+            proba_k /= normalizer
+
+            probabilities.append(proba_k)
+
+        if not self.outputs_2d_:
+            probabilities = probabilities[0]
+
+        return probabilities
--- a/venv/Lib/site-packages/sklearn/neighbors/_dist_metrics.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/neighbors/_dist_metrics.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/neighbors/_dist_metrics.pxd
+++ b/venv/Lib/site-packages/sklearn/neighbors/_dist_metrics.pxd
@ -0,0 +1,77 @@
+#!python
+#cython: boundscheck=False
+#cython: wraparound=False
+#cython: cdivision=True
+
+cimport cython
+cimport numpy as np
+from libc.math cimport fabs, sqrt, exp, cos, pow
+
+from ._typedefs cimport DTYPE_t, ITYPE_t, DITYPE_t
+from ._typedefs import DTYPE, ITYPE
+
+######################################################################
+# Inline distance functions
+#
+#  We use these for the default (euclidean) case so that they can be
+#  inlined.  This leads to faster computation for the most common case
+cdef inline DTYPE_t euclidean_dist(DTYPE_t* x1, DTYPE_t* x2,
+                                   ITYPE_t size) nogil except -1:
+    cdef DTYPE_t tmp, d=0
+    cdef np.intp_t j
+    for j in range(size):
+        tmp = x1[j] - x2[j]
+        d += tmp * tmp
+    return sqrt(d)
+
+
+cdef inline DTYPE_t euclidean_rdist(DTYPE_t* x1, DTYPE_t* x2,
+                                    ITYPE_t size) nogil except -1:
+    cdef DTYPE_t tmp, d=0
+    cdef np.intp_t j
+    for j in range(size):
+        tmp = x1[j] - x2[j]
+        d += tmp * tmp
+    return d
+
+
+cdef inline DTYPE_t euclidean_dist_to_rdist(DTYPE_t dist) nogil except -1:
+    return dist * dist
+
+
+cdef inline DTYPE_t euclidean_rdist_to_dist(DTYPE_t dist) nogil except -1:
+    return sqrt(dist)
+
+
+######################################################################
+# DistanceMetric base class
+cdef class DistanceMetric:
+    # The following attributes are required for a few of the subclasses.
+    # we must define them here so that cython's limited polymorphism will work.
+    # Because we don't expect to instantiate a lot of these objects, the
+    # extra memory overhead of this setup should not be an issue.
+    cdef DTYPE_t p
+    #cdef DTYPE_t[::1] vec
+    #cdef DTYPE_t[:, ::1] mat
+    cdef np.ndarray vec
+    cdef np.ndarray mat
+    cdef DTYPE_t* vec_ptr
+    cdef DTYPE_t* mat_ptr
+    cdef ITYPE_t size
+    cdef object func
+    cdef object kwargs
+
+    cdef DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2,
+                      ITYPE_t size) nogil except -1
+
+    cdef DTYPE_t rdist(self, DTYPE_t* x1, DTYPE_t* x2,
+                       ITYPE_t size) nogil except -1
+
+    cdef int pdist(self, DTYPE_t[:, ::1] X, DTYPE_t[:, ::1] D) except -1
+
+    cdef int cdist(self, DTYPE_t[:, ::1] X, DTYPE_t[:, ::1] Y,
+                   DTYPE_t[:, ::1] D) except -1
+
+    cdef DTYPE_t _rdist_to_dist(self, DTYPE_t rdist) nogil except -1
+
+    cdef DTYPE_t _dist_to_rdist(self, DTYPE_t dist) nogil except -1
--- a/venv/Lib/site-packages/sklearn/neighbors/_graph.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/_graph.py
@ -0,0 +1,480 @@
+"""Nearest Neighbors graph functions"""
+
+# Author: Jake Vanderplas <vanderplas@astro.washington.edu>
+#         Tom Dupre la Tour
+#
+# License: BSD 3 clause (C) INRIA, University of Amsterdam
+from ._base import KNeighborsMixin, RadiusNeighborsMixin
+from ._base import NeighborsBase
+from ._base import UnsupervisedMixin
+from ._unsupervised import NearestNeighbors
+from ..base import TransformerMixin
+from ..utils.validation import check_is_fitted, _deprecate_positional_args
+
+
+def _check_params(X, metric, p, metric_params):
+    """Check the validity of the input parameters"""
+    params = zip(['metric', 'p', 'metric_params'],
+                 [metric, p, metric_params])
+    est_params = X.get_params()
+    for param_name, func_param in params:
+        if func_param != est_params[param_name]:
+            raise ValueError(
+                "Got %s for %s, while the estimator has %s for "
+                "the same parameter." % (
+                    func_param, param_name, est_params[param_name]))
+
+
+def _query_include_self(X, include_self, mode):
+    """Return the query based on include_self param"""
+    if include_self == 'auto':
+        include_self = mode == 'connectivity'
+
+    # it does not include each sample as its own neighbors
+    if not include_self:
+        X = None
+
+    return X
+
+
+@_deprecate_positional_args
+def kneighbors_graph(X, n_neighbors, *, mode='connectivity',
+                     metric='minkowski', p=2, metric_params=None,
+                     include_self=False, n_jobs=None):
+    """Computes the (weighted) graph of k-Neighbors for points in X
+
+    Read more in the :ref:`User Guide <unsupervised_neighbors>`.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features) or BallTree
+        Sample data, in the form of a numpy array or a precomputed
+        :class:`BallTree`.
+
+    n_neighbors : int
+        Number of neighbors for each sample.
+
+    mode : {'connectivity', 'distance'}, default='connectivity'
+        Type of returned matrix: 'connectivity' will return the connectivity
+        matrix with ones and zeros, and 'distance' will return the distances
+        between neighbors according to the given metric.
+
+    metric : str, default='minkowski'
+        The distance metric used to calculate the k-Neighbors for each sample
+        point. The DistanceMetric class gives a list of available metrics.
+        The default distance is 'euclidean' ('minkowski' metric with the p
+        param equal to 2.)
+
+    p : int, default=2
+        Power parameter for the Minkowski metric. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
+    metric_params : dict, default=None
+        additional keyword arguments for the metric function.
+
+    include_self : bool or 'auto', default=False
+        Whether or not to mark each sample as the first nearest neighbor to
+        itself. If 'auto', then True is used for mode='connectivity' and False
+        for mode='distance'.
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run for neighbors search.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Returns
+    -------
+    A : sparse matrix of shape (n_samples, n_samples)
+        Graph where A[i, j] is assigned the weight of edge that
+        connects i to j. The matrix is of CSR format.
+
+    Examples
+    --------
+    >>> X = [[0], [3], [1]]
+    >>> from sklearn.neighbors import kneighbors_graph
+    >>> A = kneighbors_graph(X, 2, mode='connectivity', include_self=True)
+    >>> A.toarray()
+    array([[1., 0., 1.],
+           [0., 1., 1.],
+           [1., 0., 1.]])
+
+    See also
+    --------
+    radius_neighbors_graph
+    """
+    if not isinstance(X, KNeighborsMixin):
+        X = NearestNeighbors(n_neighbors=n_neighbors, metric=metric, p=p,
+                             metric_params=metric_params, n_jobs=n_jobs).fit(X)
+    else:
+        _check_params(X, metric, p, metric_params)
+
+    query = _query_include_self(X._fit_X, include_self, mode)
+    return X.kneighbors_graph(X=query, n_neighbors=n_neighbors, mode=mode)
+
+
+@_deprecate_positional_args
+def radius_neighbors_graph(X, radius, *, mode='connectivity',
+                           metric='minkowski', p=2, metric_params=None,
+                           include_self=False, n_jobs=None):
+    """Computes the (weighted) graph of Neighbors for points in X
+
+    Neighborhoods are restricted the points at a distance lower than
+    radius.
+
+    Read more in the :ref:`User Guide <unsupervised_neighbors>`.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features) or BallTree
+        Sample data, in the form of a numpy array or a precomputed
+        :class:`BallTree`.
+
+    radius : float
+        Radius of neighborhoods.
+
+    mode : {'connectivity', 'distance'}, default='connectivity'
+        Type of returned matrix: 'connectivity' will return the connectivity
+        matrix with ones and zeros, and 'distance' will return the distances
+        between neighbors according to the given metric.
+
+    metric : str, default='minkowski'
+        The distance metric used to calculate the neighbors within a
+        given radius for each sample point. The DistanceMetric class
+        gives a list of available metrics. The default distance is
+        'euclidean' ('minkowski' metric with the param equal to 2.)
+
+    p : int, default=2
+        Power parameter for the Minkowski metric. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
+    metric_params : dict, default=None
+        additional keyword arguments for the metric function.
+
+    include_self : bool or 'auto', default=False
+        Whether or not to mark each sample as the first nearest neighbor to
+        itself. If 'auto', then True is used for mode='connectivity' and False
+        for mode='distance'.
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run for neighbors search.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Returns
+    -------
+    A : sparse matrix of shape (n_samples, n_samples)
+        Graph where A[i, j] is assigned the weight of edge that connects
+        i to j. The matrix is of CSR format.
+
+    Examples
+    --------
+    >>> X = [[0], [3], [1]]
+    >>> from sklearn.neighbors import radius_neighbors_graph
+    >>> A = radius_neighbors_graph(X, 1.5, mode='connectivity',
+    ...                            include_self=True)
+    >>> A.toarray()
+    array([[1., 0., 1.],
+           [0., 1., 0.],
+           [1., 0., 1.]])
+
+    See also
+    --------
+    kneighbors_graph
+    """
+    if not isinstance(X, RadiusNeighborsMixin):
+        X = NearestNeighbors(radius=radius, metric=metric, p=p,
+                             metric_params=metric_params, n_jobs=n_jobs).fit(X)
+    else:
+        _check_params(X, metric, p, metric_params)
+
+    query = _query_include_self(X._fit_X, include_self, mode)
+    return X.radius_neighbors_graph(query, radius, mode)
+
+
+class KNeighborsTransformer(KNeighborsMixin, UnsupervisedMixin,
+                            TransformerMixin, NeighborsBase):
+    """Transform X into a (weighted) graph of k nearest neighbors
+
+    The transformed data is a sparse graph as returned by kneighbors_graph.
+
+    Read more in the :ref:`User Guide <neighbors_transformer>`.
+
+    .. versionadded:: 0.22
+
+    Parameters
+    ----------
+    mode : {'distance', 'connectivity'}, default='distance'
+        Type of returned matrix: 'connectivity' will return the connectivity
+        matrix with ones and zeros, and 'distance' will return the distances
+        between neighbors according to the given metric.
+
+    n_neighbors : int, default=5
+        Number of neighbors for each sample in the transformed sparse graph.
+        For compatibility reasons, as each sample is considered as its own
+        neighbor, one extra neighbor will be computed when mode == 'distance'.
+        In this case, the sparse graph contains (n_neighbors + 1) neighbors.
+
+    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
+        Algorithm used to compute the nearest neighbors:
+
+        - 'ball_tree' will use :class:`BallTree`
+        - 'kd_tree' will use :class:`KDTree`
+        - 'brute' will use a brute-force search.
+        - 'auto' will attempt to decide the most appropriate algorithm
+          based on the values passed to :meth:`fit` method.
+
+        Note: fitting on sparse input will override the setting of
+        this parameter, using brute force.
+
+    leaf_size : int, default=30
+        Leaf size passed to BallTree or KDTree.  This can affect the
+        speed of the construction and query, as well as the memory
+        required to store the tree.  The optimal value depends on the
+        nature of the problem.
+
+    metric : str or callable, default='minkowski'
+        metric to use for distance computation. Any metric from scikit-learn
+        or scipy.spatial.distance can be used.
+
+        If metric is a callable function, it is called on each
+        pair of instances (rows) and the resulting value recorded. The callable
+        should take two arrays as input and return one value indicating the
+        distance between them. This works for Scipy's metrics, but is less
+        efficient than passing the metric name as a string.
+
+        Distance matrices are not supported.
+
+        Valid values for metric are:
+
+        - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',
+          'manhattan']
+
+        - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',
+          'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',
+          'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',
+          'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',
+          'yule']
+
+        See the documentation for scipy.spatial.distance for details on these
+        metrics.
+
+    p : int, default=2
+        Parameter for the Minkowski metric from
+        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
+    metric_params : dict, default=None
+        Additional keyword arguments for the metric function.
+
+    n_jobs : int, default=1
+        The number of parallel jobs to run for neighbors search.
+        If ``-1``, then the number of jobs is set to the number of CPU cores.
+
+    Examples
+    --------
+    >>> from sklearn.manifold import Isomap
+    >>> from sklearn.neighbors import KNeighborsTransformer
+    >>> from sklearn.pipeline import make_pipeline
+    >>> estimator = make_pipeline(
+    ...     KNeighborsTransformer(n_neighbors=5, mode='distance'),
+    ...     Isomap(neighbors_algorithm='precomputed'))
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, mode='distance', n_neighbors=5, algorithm='auto',
+                 leaf_size=30, metric='minkowski', p=2, metric_params=None,
+                 n_jobs=1):
+        super(KNeighborsTransformer, self).__init__(
+            n_neighbors=n_neighbors, radius=None, algorithm=algorithm,
+            leaf_size=leaf_size, metric=metric, p=p,
+            metric_params=metric_params, n_jobs=n_jobs)
+        self.mode = mode
+
+    def transform(self, X):
+        """Computes the (weighted) graph of Neighbors for points in X
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples_transform, n_features)
+            Sample data.
+
+        Returns
+        -------
+        Xt : sparse matrix of shape (n_samples_transform, n_samples_fit)
+            Xt[i, j] is assigned the weight of edge that connects i to j.
+            Only the neighbors have an explicit value.
+            The diagonal is always explicit.
+            The matrix is of CSR format.
+        """
+        check_is_fitted(self)
+        add_one = self.mode == 'distance'
+        return self.kneighbors_graph(X, mode=self.mode,
+                                     n_neighbors=self.n_neighbors + add_one)
+
+    def fit_transform(self, X, y=None):
+        """Fit to data, then transform it.
+
+        Fits transformer to X and y with optional parameters fit_params
+        and returns a transformed version of X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training set.
+
+        y : ignored
+
+        Returns
+        -------
+        Xt : sparse matrix of shape (n_samples, n_samples)
+            Xt[i, j] is assigned the weight of edge that connects i to j.
+            Only the neighbors have an explicit value.
+            The diagonal is always explicit.
+            The matrix is of CSR format.
+        """
+        return self.fit(X).transform(X)
+
+
+class RadiusNeighborsTransformer(RadiusNeighborsMixin, UnsupervisedMixin,
+                                 TransformerMixin, NeighborsBase):
+    """Transform X into a (weighted) graph of neighbors nearer than a radius
+
+    The transformed data is a sparse graph as returned by
+    radius_neighbors_graph.
+
+    Read more in the :ref:`User Guide <neighbors_transformer>`.
+
+    .. versionadded:: 0.22
+
+    Parameters
+    ----------
+    mode : {'distance', 'connectivity'}, default='distance'
+        Type of returned matrix: 'connectivity' will return the connectivity
+        matrix with ones and zeros, and 'distance' will return the distances
+        between neighbors according to the given metric.
+
+    radius : float, default=1.
+        Radius of neighborhood in the transformed sparse graph.
+
+    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
+        Algorithm used to compute the nearest neighbors:
+
+        - 'ball_tree' will use :class:`BallTree`
+        - 'kd_tree' will use :class:`KDTree`
+        - 'brute' will use a brute-force search.
+        - 'auto' will attempt to decide the most appropriate algorithm
+          based on the values passed to :meth:`fit` method.
+
+        Note: fitting on sparse input will override the setting of
+        this parameter, using brute force.
+
+    leaf_size : int, default=30
+        Leaf size passed to BallTree or KDTree.  This can affect the
+        speed of the construction and query, as well as the memory
+        required to store the tree.  The optimal value depends on the
+        nature of the problem.
+
+    metric : str or callable, default='minkowski'
+        metric to use for distance computation. Any metric from scikit-learn
+        or scipy.spatial.distance can be used.
+
+        If metric is a callable function, it is called on each
+        pair of instances (rows) and the resulting value recorded. The callable
+        should take two arrays as input and return one value indicating the
+        distance between them. This works for Scipy's metrics, but is less
+        efficient than passing the metric name as a string.
+
+        Distance matrices are not supported.
+
+        Valid values for metric are:
+
+        - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',
+          'manhattan']
+
+        - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',
+          'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',
+          'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',
+          'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',
+          'yule']
+
+        See the documentation for scipy.spatial.distance for details on these
+        metrics.
+
+    p : int, default=2
+        Parameter for the Minkowski metric from
+        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
+    metric_params : dict, default=None
+        Additional keyword arguments for the metric function.
+
+    n_jobs : int, default=1
+        The number of parallel jobs to run for neighbors search.
+        If ``-1``, then the number of jobs is set to the number of CPU cores.
+
+    Examples
+    --------
+    >>> from sklearn.cluster import DBSCAN
+    >>> from sklearn.neighbors import RadiusNeighborsTransformer
+    >>> from sklearn.pipeline import make_pipeline
+    >>> estimator = make_pipeline(
+    ...     RadiusNeighborsTransformer(radius=42.0, mode='distance'),
+    ...     DBSCAN(min_samples=30, metric='precomputed'))
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, mode='distance', radius=1., algorithm='auto',
+                 leaf_size=30, metric='minkowski', p=2, metric_params=None,
+                 n_jobs=1):
+        super(RadiusNeighborsTransformer, self).__init__(
+            n_neighbors=None, radius=radius, algorithm=algorithm,
+            leaf_size=leaf_size, metric=metric, p=p,
+            metric_params=metric_params, n_jobs=n_jobs)
+        self.mode = mode
+
+    def transform(self, X):
+        """Computes the (weighted) graph of Neighbors for points in X
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples_transform, n_features)
+            Sample data
+
+        Returns
+        -------
+        Xt : sparse matrix of shape (n_samples_transform, n_samples_fit)
+            Xt[i, j] is assigned the weight of edge that connects i to j.
+            Only the neighbors have an explicit value.
+            The diagonal is always explicit.
+            The matrix is of CSR format.
+        """
+        check_is_fitted(self)
+        return self.radius_neighbors_graph(X, mode=self.mode,
+                                           sort_results=True)
+
+    def fit_transform(self, X, y=None):
+        """Fit to data, then transform it.
+
+        Fits transformer to X and y with optional parameters fit_params
+        and returns a transformed version of X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training set.
+
+        y : ignored
+
+        Returns
+        -------
+        Xt : sparse matrix of shape (n_samples, n_samples)
+            Xt[i, j] is assigned the weight of edge that connects i to j.
+            Only the neighbors have an explicit value.
+            The diagonal is always explicit.
+            The matrix is of CSR format.
+        """
+        return self.fit(X).transform(X)
--- a/venv/Lib/site-packages/sklearn/neighbors/_kd_tree.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/neighbors/_kd_tree.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/neighbors/_kde.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/_kde.py
@ -0,0 +1,276 @@
+"""
+Kernel Density Estimation
+-------------------------
+"""
+# Author: Jake Vanderplas <jakevdp@cs.washington.edu>
+
+import numpy as np
+from scipy.special import gammainc
+from ..base import BaseEstimator
+from ..utils import check_array, check_random_state
+from ..utils.validation import _check_sample_weight, check_is_fitted
+from ..utils.validation import _deprecate_positional_args
+
+from ..utils.extmath import row_norms
+from ._ball_tree import BallTree, DTYPE
+from ._kd_tree import KDTree
+
+
+VALID_KERNELS = ['gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear',
+                 'cosine']
+TREE_DICT = {'ball_tree': BallTree, 'kd_tree': KDTree}
+
+
+# TODO: implement a brute force version for testing purposes
+# TODO: bandwidth estimation
+# TODO: create a density estimation base class?
+class KernelDensity(BaseEstimator):
+    """Kernel Density Estimation.
+
+    Read more in the :ref:`User Guide <kernel_density>`.
+
+    Parameters
+    ----------
+    bandwidth : float
+        The bandwidth of the kernel.
+
+    algorithm : str
+        The tree algorithm to use.  Valid options are
+        ['kd_tree'|'ball_tree'|'auto'].  Default is 'auto'.
+
+    kernel : str
+        The kernel to use.  Valid kernels are
+        ['gaussian'|'tophat'|'epanechnikov'|'exponential'|'linear'|'cosine']
+        Default is 'gaussian'.
+
+    metric : str
+        The distance metric to use.  Note that not all metrics are
+        valid with all algorithms.  Refer to the documentation of
+        :class:`BallTree` and :class:`KDTree` for a description of
+        available algorithms.  Note that the normalization of the density
+        output is correct only for the Euclidean distance metric. Default
+        is 'euclidean'.
+
+    atol : float
+        The desired absolute tolerance of the result.  A larger tolerance will
+        generally lead to faster execution. Default is 0.
+
+    rtol : float
+        The desired relative tolerance of the result.  A larger tolerance will
+        generally lead to faster execution.  Default is 1E-8.
+
+    breadth_first : bool
+        If true (default), use a breadth-first approach to the problem.
+        Otherwise use a depth-first approach.
+
+    leaf_size : int
+        Specify the leaf size of the underlying tree.  See :class:`BallTree`
+        or :class:`KDTree` for details.  Default is 40.
+
+    metric_params : dict
+        Additional parameters to be passed to the tree for use with the
+        metric.  For more information, see the documentation of
+        :class:`BallTree` or :class:`KDTree`.
+
+    See Also
+    --------
+    sklearn.neighbors.KDTree : K-dimensional tree for fast generalized N-point
+        problems.
+    sklearn.neighbors.BallTree : Ball tree for fast generalized N-point
+        problems.
+
+    Examples
+    --------
+    Compute a gaussian kernel density estimate with a fixed bandwidth.
+    >>> import numpy as np
+    >>> rng = np.random.RandomState(42)
+    >>> X = rng.random_sample((100, 3))
+    >>> kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(X)
+    >>> log_density = kde.score_samples(X[:3])
+    >>> log_density
+    array([-1.52955942, -1.51462041, -1.60244657])
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, bandwidth=1.0, algorithm='auto',
+                 kernel='gaussian', metric="euclidean", atol=0, rtol=0,
+                 breadth_first=True, leaf_size=40, metric_params=None):
+        self.algorithm = algorithm
+        self.bandwidth = bandwidth
+        self.kernel = kernel
+        self.metric = metric
+        self.atol = atol
+        self.rtol = rtol
+        self.breadth_first = breadth_first
+        self.leaf_size = leaf_size
+        self.metric_params = metric_params
+
+        # run the choose algorithm code so that exceptions will happen here
+        # we're using clone() in the GenerativeBayes classifier,
+        # so we can't do this kind of logic in __init__
+        self._choose_algorithm(self.algorithm, self.metric)
+
+        if bandwidth <= 0:
+            raise ValueError("bandwidth must be positive")
+        if kernel not in VALID_KERNELS:
+            raise ValueError("invalid kernel: '{0}'".format(kernel))
+
+    def _choose_algorithm(self, algorithm, metric):
+        # given the algorithm string + metric string, choose the optimal
+        # algorithm to compute the result.
+        if algorithm == 'auto':
+            # use KD Tree if possible
+            if metric in KDTree.valid_metrics:
+                return 'kd_tree'
+            elif metric in BallTree.valid_metrics:
+                return 'ball_tree'
+            else:
+                raise ValueError("invalid metric: '{0}'".format(metric))
+        elif algorithm in TREE_DICT:
+            if metric not in TREE_DICT[algorithm].valid_metrics:
+                raise ValueError("invalid metric for {0}: "
+                                 "'{1}'".format(TREE_DICT[algorithm],
+                                                metric))
+            return algorithm
+        else:
+            raise ValueError("invalid algorithm: '{0}'".format(algorithm))
+
+    def fit(self, X, y=None, sample_weight=None):
+        """Fit the Kernel Density model on the data.
+
+        Parameters
+        ----------
+        X : array_like, shape (n_samples, n_features)
+            List of n_features-dimensional data points.  Each row
+            corresponds to a single data point.
+        y : None
+            Ignored. This parameter exists only for compatibility with
+            :class:`sklearn.pipeline.Pipeline`.
+        sample_weight : array_like, shape (n_samples,), optional
+            List of sample weights attached to the data X.
+
+            .. versionadded:: 0.20
+
+        Returns
+        -------
+        self : object
+            Returns instance of object.
+        """
+        algorithm = self._choose_algorithm(self.algorithm, self.metric)
+        X = self._validate_data(X, order='C', dtype=DTYPE)
+
+        if sample_weight is not None:
+            sample_weight = _check_sample_weight(sample_weight, X, DTYPE)
+            if sample_weight.min() <= 0:
+                raise ValueError("sample_weight must have positive values")
+
+        kwargs = self.metric_params
+        if kwargs is None:
+            kwargs = {}
+        self.tree_ = TREE_DICT[algorithm](X, metric=self.metric,
+                                          leaf_size=self.leaf_size,
+                                          sample_weight=sample_weight,
+                                          **kwargs)
+        return self
+
+    def score_samples(self, X):
+        """Evaluate the log density model on the data.
+
+        Parameters
+        ----------
+        X : array_like, shape (n_samples, n_features)
+            An array of points to query.  Last dimension should match dimension
+            of training data (n_features).
+
+        Returns
+        -------
+        density : ndarray, shape (n_samples,)
+            The array of log(density) evaluations. These are normalized to be
+            probability densities, so values will be low for high-dimensional
+            data.
+        """
+        check_is_fitted(self)
+        # The returned density is normalized to the number of points.
+        # For it to be a probability, we must scale it.  For this reason
+        # we'll also scale atol.
+        X = check_array(X, order='C', dtype=DTYPE)
+        if self.tree_.sample_weight is None:
+            N = self.tree_.data.shape[0]
+        else:
+            N = self.tree_.sum_weight
+        atol_N = self.atol * N
+        log_density = self.tree_.kernel_density(
+            X, h=self.bandwidth, kernel=self.kernel, atol=atol_N,
+            rtol=self.rtol, breadth_first=self.breadth_first, return_log=True)
+        log_density -= np.log(N)
+        return log_density
+
+    def score(self, X, y=None):
+        """Compute the total log probability density under the model.
+
+        Parameters
+        ----------
+        X : array_like, shape (n_samples, n_features)
+            List of n_features-dimensional data points.  Each row
+            corresponds to a single data point.
+        y : None
+            Ignored. This parameter exists only for compatibility with
+            :class:`sklearn.pipeline.Pipeline`.
+
+        Returns
+        -------
+        logprob : float
+            Total log-likelihood of the data in X. This is normalized to be a
+            probability density, so the value will be low for high-dimensional
+            data.
+        """
+        return np.sum(self.score_samples(X))
+
+    def sample(self, n_samples=1, random_state=None):
+        """Generate random samples from the model.
+
+        Currently, this is implemented only for gaussian and tophat kernels.
+
+        Parameters
+        ----------
+        n_samples : int, optional
+            Number of samples to generate. Defaults to 1.
+
+        random_state : int, RandomState instance, default=None
+            Determines random number generation used to generate
+            random samples. Pass an int for reproducible results
+            across multiple function calls.
+            See :term: `Glossary <random_state>`.
+
+        Returns
+        -------
+        X : array_like, shape (n_samples, n_features)
+            List of samples.
+        """
+        check_is_fitted(self)
+        # TODO: implement sampling for other valid kernel shapes
+        if self.kernel not in ['gaussian', 'tophat']:
+            raise NotImplementedError()
+
+        data = np.asarray(self.tree_.data)
+
+        rng = check_random_state(random_state)
+        u = rng.uniform(0, 1, size=n_samples)
+        if self.tree_.sample_weight is None:
+            i = (u * data.shape[0]).astype(np.int64)
+        else:
+            cumsum_weight = np.cumsum(np.asarray(self.tree_.sample_weight))
+            sum_weight = cumsum_weight[-1]
+            i = np.searchsorted(cumsum_weight, u * sum_weight)
+        if self.kernel == 'gaussian':
+            return np.atleast_2d(rng.normal(data[i], self.bandwidth))
+
+        elif self.kernel == 'tophat':
+            # we first draw points from a d-dimensional normal distribution,
+            # then use an incomplete gamma function to map them to a uniform
+            # d-dimensional tophat distribution.
+            dim = data.shape[1]
+            X = rng.normal(size=(n_samples, dim))
+            s_sq = row_norms(X, squared=True)
+            correction = (gammainc(0.5 * dim, 0.5 * s_sq) ** (1. / dim)
+                          * self.bandwidth / np.sqrt(s_sq))
+            return data[i] + X * correction[:, np.newaxis]
--- a/venv/Lib/site-packages/sklearn/neighbors/_lof.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/_lof.py
@ -0,0 +1,517 @@
+# Authors: Nicolas Goix <nicolas.goix@telecom-paristech.fr>
+#          Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
+# License: BSD 3 clause
+
+import numpy as np
+import warnings
+
+from ._base import NeighborsBase
+from ._base import KNeighborsMixin
+from ._base import UnsupervisedMixin
+from ..base import OutlierMixin
+
+from ..utils.validation import check_is_fitted
+from ..utils.validation import _deprecate_positional_args
+from ..utils import check_array
+
+__all__ = ["LocalOutlierFactor"]
+
+
+class LocalOutlierFactor(KNeighborsMixin, UnsupervisedMixin,
+                         OutlierMixin, NeighborsBase):
+    """Unsupervised Outlier Detection using Local Outlier Factor (LOF)
+
+    The anomaly score of each sample is called Local Outlier Factor.
+    It measures the local deviation of density of a given sample with
+    respect to its neighbors.
+    It is local in that the anomaly score depends on how isolated the object
+    is with respect to the surrounding neighborhood.
+    More precisely, locality is given by k-nearest neighbors, whose distance
+    is used to estimate the local density.
+    By comparing the local density of a sample to the local densities of
+    its neighbors, one can identify samples that have a substantially lower
+    density than their neighbors. These are considered outliers.
+
+    .. versionadded:: 0.19
+
+    Parameters
+    ----------
+    n_neighbors : int, default=20
+        Number of neighbors to use by default for :meth:`kneighbors` queries.
+        If n_neighbors is larger than the number of samples provided,
+        all samples will be used.
+
+    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
+        Algorithm used to compute the nearest neighbors:
+
+        - 'ball_tree' will use :class:`BallTree`
+        - 'kd_tree' will use :class:`KDTree`
+        - 'brute' will use a brute-force search.
+        - 'auto' will attempt to decide the most appropriate algorithm
+          based on the values passed to :meth:`fit` method.
+
+        Note: fitting on sparse input will override the setting of
+        this parameter, using brute force.
+
+    leaf_size : int, default=30
+        Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can
+        affect the speed of the construction and query, as well as the memory
+        required to store the tree. The optimal value depends on the
+        nature of the problem.
+
+    metric : str or callable, default='minkowski'
+        metric used for the distance computation. Any metric from scikit-learn
+        or scipy.spatial.distance can be used.
+
+        If metric is "precomputed", X is assumed to be a distance matrix and
+        must be square. X may be a sparse matrix, in which case only "nonzero"
+        elements may be considered neighbors.
+
+        If metric is a callable function, it is called on each
+        pair of instances (rows) and the resulting value recorded. The callable
+        should take two arrays as input and return one value indicating the
+        distance between them. This works for Scipy's metrics, but is less
+        efficient than passing the metric name as a string.
+
+        Valid values for metric are:
+
+        - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',
+          'manhattan']
+
+        - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',
+          'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',
+          'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',
+          'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',
+          'yule']
+
+        See the documentation for scipy.spatial.distance for details on these
+        metrics:
+        https://docs.scipy.org/doc/scipy/reference/spatial.distance.html
+
+    p : int, default=2
+        Parameter for the Minkowski metric from
+        :func:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this
+        is equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
+    metric_params : dict, default=None
+        Additional keyword arguments for the metric function.
+
+    contamination : 'auto' or float, default='auto'
+        The amount of contamination of the data set, i.e. the proportion
+        of outliers in the data set. When fitting this is used to define the
+        threshold on the scores of the samples.
+
+        - if 'auto', the threshold is determined as in the
+          original paper,
+        - if a float, the contamination should be in the range [0, 0.5].
+
+        .. versionchanged:: 0.22
+           The default value of ``contamination`` changed from 0.1
+           to ``'auto'``.
+
+    novelty : bool, default=False
+        By default, LocalOutlierFactor is only meant to be used for outlier
+        detection (novelty=False). Set novelty to True if you want to use
+        LocalOutlierFactor for novelty detection. In this case be aware that
+        that you should only use predict, decision_function and score_samples
+        on new unseen data and not on the training set.
+
+        .. versionadded:: 0.20
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run for neighbors search.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    negative_outlier_factor_ : ndarray of shape (n_samples,)
+        The opposite LOF of the training samples. The higher, the more normal.
+        Inliers tend to have a LOF score close to 1
+        (``negative_outlier_factor_`` close to -1), while outliers tend to have
+        a larger LOF score.
+
+        The local outlier factor (LOF) of a sample captures its
+        supposed 'degree of abnormality'.
+        It is the average of the ratio of the local reachability density of
+        a sample and those of its k-nearest neighbors.
+
+    n_neighbors_ : int
+        The actual number of neighbors used for :meth:`kneighbors` queries.
+
+    offset_ : float
+        Offset used to obtain binary labels from the raw scores.
+        Observations having a negative_outlier_factor smaller than `offset_`
+        are detected as abnormal.
+        The offset is set to -1.5 (inliers score around -1), except when a
+        contamination parameter different than "auto" is provided. In that
+        case, the offset is defined in such a way we obtain the expected
+        number of outliers in training.
+
+        .. versionadded:: 0.20
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.neighbors import LocalOutlierFactor
+    >>> X = [[-1.1], [0.2], [101.1], [0.3]]
+    >>> clf = LocalOutlierFactor(n_neighbors=2)
+    >>> clf.fit_predict(X)
+    array([ 1,  1, -1,  1])
+    >>> clf.negative_outlier_factor_
+    array([ -0.9821...,  -1.0370..., -73.3697...,  -0.9821...])
+
+    References
+    ----------
+    .. [1] Breunig, M. M., Kriegel, H. P., Ng, R. T., & Sander, J. (2000, May).
+           LOF: identifying density-based local outliers. In ACM sigmod record.
+    """
+    @_deprecate_positional_args
+    def __init__(self, n_neighbors=20, *, algorithm='auto', leaf_size=30,
+                 metric='minkowski', p=2, metric_params=None,
+                 contamination="auto", novelty=False, n_jobs=None):
+        super().__init__(
+            n_neighbors=n_neighbors,
+            algorithm=algorithm,
+            leaf_size=leaf_size, metric=metric, p=p,
+            metric_params=metric_params, n_jobs=n_jobs)
+        self.contamination = contamination
+        self.novelty = novelty
+
+    @property
+    def fit_predict(self):
+        """Fits the model to the training set X and returns the labels.
+
+        **Only available for novelty detection (when novelty is set to True).**
+        Label is 1 for an inlier and -1 for an outlier according to the LOF
+        score and the contamination parameter.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features), default=None
+            The query sample or samples to compute the Local Outlier Factor
+            w.r.t. to the training samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        is_inlier : ndarray of shape (n_samples,)
+            Returns -1 for anomalies/outliers and 1 for inliers.
+        """
+
+        # As fit_predict would be different from fit.predict, fit_predict is
+        # only available for outlier detection (novelty=False)
+
+        if self.novelty:
+            msg = ('fit_predict is not available when novelty=True. Use '
+                   'novelty=False if you want to predict on the training set.')
+            raise AttributeError(msg)
+
+        return self._fit_predict
+
+    def _fit_predict(self, X, y=None):
+        """Fits the model to the training set X and returns the labels.
+
+        Label is 1 for an inlier and -1 for an outlier according to the LOF
+        score and the contamination parameter.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features), default=None
+            The query sample or samples to compute the Local Outlier Factor
+            w.r.t. to the training samples.
+
+        Returns
+        -------
+        is_inlier : ndarray of shape (n_samples,)
+            Returns -1 for anomalies/outliers and 1 for inliers.
+        """
+
+        # As fit_predict would be different from fit.predict, fit_predict is
+        # only available for outlier detection (novelty=False)
+
+        return self.fit(X)._predict()
+
+    def fit(self, X, y=None):
+        """Fit the model using X as training data.
+
+        Parameters
+        ----------
+        X : BallTree, KDTree or {array-like, sparse matrix} of shape \
+                (n_samples, n_features) or (n_samples, n_samples)
+            Training data. If array or matrix, the shape is (n_samples,
+            n_features), or (n_samples, n_samples) if metric='precomputed'.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+        """
+        if self.contamination != 'auto':
+            if not(0. < self.contamination <= .5):
+                raise ValueError("contamination must be in (0, 0.5], "
+                                 "got: %f" % self.contamination)
+
+        super().fit(X)
+
+        n_samples = self.n_samples_fit_
+        if self.n_neighbors > n_samples:
+            warnings.warn("n_neighbors (%s) is greater than the "
+                          "total number of samples (%s). n_neighbors "
+                          "will be set to (n_samples - 1) for estimation."
+                          % (self.n_neighbors, n_samples))
+        self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))
+
+        self._distances_fit_X_, _neighbors_indices_fit_X_ = self.kneighbors(
+            n_neighbors=self.n_neighbors_)
+
+        self._lrd = self._local_reachability_density(
+            self._distances_fit_X_, _neighbors_indices_fit_X_)
+
+        # Compute lof score over training samples to define offset_:
+        lrd_ratios_array = (self._lrd[_neighbors_indices_fit_X_] /
+                            self._lrd[:, np.newaxis])
+
+        self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)
+
+        if self.contamination == "auto":
+            # inliers score around -1 (the higher, the less abnormal).
+            self.offset_ = -1.5
+        else:
+            self.offset_ = np.percentile(self.negative_outlier_factor_,
+                                         100. * self.contamination)
+
+        return self
+
+    @property
+    def predict(self):
+        """Predict the labels (1 inlier, -1 outlier) of X according to LOF.
+
+        **Only available for novelty detection (when novelty is set to True).**
+        This method allows to generalize prediction to *new observations* (not
+        in the training set).
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The query sample or samples to compute the Local Outlier Factor
+            w.r.t. to the training samples.
+
+        Returns
+        -------
+        is_inlier : ndarray of shape (n_samples,)
+            Returns -1 for anomalies/outliers and +1 for inliers.
+        """
+        if not self.novelty:
+            msg = ('predict is not available when novelty=False, use '
+                   'fit_predict if you want to predict on training data. Use '
+                   'novelty=True if you want to use LOF for novelty detection '
+                   'and predict on new unseen data.')
+            raise AttributeError(msg)
+
+        return self._predict
+
+    def _predict(self, X=None):
+        """Predict the labels (1 inlier, -1 outlier) of X according to LOF.
+
+        If X is None, returns the same as fit_predict(X_train).
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features), default=None
+            The query sample or samples to compute the Local Outlier Factor
+            w.r.t. to the training samples. If None, makes prediction on the
+            training data without considering them as their own neighbors.
+
+        Returns
+        -------
+        is_inlier : ndarray of shape (n_samples,)
+            Returns -1 for anomalies/outliers and +1 for inliers.
+        """
+        check_is_fitted(self)
+
+        if X is not None:
+            X = check_array(X, accept_sparse='csr')
+            is_inlier = np.ones(X.shape[0], dtype=int)
+            is_inlier[self.decision_function(X) < 0] = -1
+        else:
+            is_inlier = np.ones(self.n_samples_fit_, dtype=int)
+            is_inlier[self.negative_outlier_factor_ < self.offset_] = -1
+
+        return is_inlier
+
+    @property
+    def decision_function(self):
+        """Shifted opposite of the Local Outlier Factor of X.
+
+        Bigger is better, i.e. large values correspond to inliers.
+
+        **Only available for novelty detection (when novelty is set to True).**
+        The shift offset allows a zero threshold for being an outlier.
+        The argument X is supposed to contain *new data*: if X contains a
+        point from training, it considers the later in its own neighborhood.
+        Also, the samples in X are not considered in the neighborhood of any
+        point.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The query sample or samples to compute the Local Outlier Factor
+            w.r.t. the training samples.
+
+        Returns
+        -------
+        shifted_opposite_lof_scores : ndarray of shape (n_samples,)
+            The shifted opposite of the Local Outlier Factor of each input
+            samples. The lower, the more abnormal. Negative scores represent
+            outliers, positive scores represent inliers.
+        """
+        if not self.novelty:
+            msg = ('decision_function is not available when novelty=False. '
+                   'Use novelty=True if you want to use LOF for novelty '
+                   'detection and compute decision_function for new unseen '
+                   'data. Note that the opposite LOF of the training samples '
+                   'is always available by considering the '
+                   'negative_outlier_factor_ attribute.')
+            raise AttributeError(msg)
+
+        return self._decision_function
+
+    def _decision_function(self, X):
+        """Shifted opposite of the Local Outlier Factor of X.
+
+        Bigger is better, i.e. large values correspond to inliers.
+
+        **Only available for novelty detection (when novelty is set to True).**
+        The shift offset allows a zero threshold for being an outlier.
+        The argument X is supposed to contain *new data*: if X contains a
+        point from training, it considers the later in its own neighborhood.
+        Also, the samples in X are not considered in the neighborhood of any
+        point.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The query sample or samples to compute the Local Outlier Factor
+            w.r.t. the training samples.
+
+        Returns
+        -------
+        shifted_opposite_lof_scores : ndarray of shape (n_samples,)
+            The shifted opposite of the Local Outlier Factor of each input
+            samples. The lower, the more abnormal. Negative scores represent
+            outliers, positive scores represent inliers.
+        """
+
+        return self._score_samples(X) - self.offset_
+
+    @property
+    def score_samples(self):
+        """Opposite of the Local Outlier Factor of X.
+
+        It is the opposite as bigger is better, i.e. large values correspond
+        to inliers.
+
+        **Only available for novelty detection (when novelty is set to True).**
+        The argument X is supposed to contain *new data*: if X contains a
+        point from training, it considers the later in its own neighborhood.
+        Also, the samples in X are not considered in the neighborhood of any
+        point.
+        The score_samples on training data is available by considering the
+        the ``negative_outlier_factor_`` attribute.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The query sample or samples to compute the Local Outlier Factor
+            w.r.t. the training samples.
+
+        Returns
+        -------
+        opposite_lof_scores : ndarray of shape (n_samples,)
+            The opposite of the Local Outlier Factor of each input samples.
+            The lower, the more abnormal.
+        """
+        if not self.novelty:
+            msg = ('score_samples is not available when novelty=False. The '
+                   'scores of the training samples are always available '
+                   'through the negative_outlier_factor_ attribute. Use '
+                   'novelty=True if you want to use LOF for novelty detection '
+                   'and compute score_samples for new unseen data.')
+            raise AttributeError(msg)
+
+        return self._score_samples
+
+    def _score_samples(self, X):
+        """Opposite of the Local Outlier Factor of X.
+
+        It is the opposite as bigger is better, i.e. large values correspond
+        to inliers.
+
+        **Only available for novelty detection (when novelty is set to True).**
+        The argument X is supposed to contain *new data*: if X contains a
+        point from training, it considers the later in its own neighborhood.
+        Also, the samples in X are not considered in the neighborhood of any
+        point.
+        The score_samples on training data is available by considering the
+        the ``negative_outlier_factor_`` attribute.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The query sample or samples to compute the Local Outlier Factor
+            w.r.t. the training samples.
+
+        Returns
+        -------
+        opposite_lof_scores : ndarray of shape (n_samples,)
+            The opposite of the Local Outlier Factor of each input samples.
+            The lower, the more abnormal.
+        """
+        check_is_fitted(self)
+        X = check_array(X, accept_sparse='csr')
+
+        distances_X, neighbors_indices_X = (
+            self.kneighbors(X, n_neighbors=self.n_neighbors_))
+        X_lrd = self._local_reachability_density(distances_X,
+                                                 neighbors_indices_X)
+
+        lrd_ratios_array = (self._lrd[neighbors_indices_X] /
+                            X_lrd[:, np.newaxis])
+
+        # as bigger is better:
+        return -np.mean(lrd_ratios_array, axis=1)
+
+    def _local_reachability_density(self, distances_X, neighbors_indices):
+        """The local reachability density (LRD)
+
+        The LRD of a sample is the inverse of the average reachability
+        distance of its k-nearest neighbors.
+
+        Parameters
+        ----------
+        distances_X : ndarray of shape (n_queries, self.n_neighbors)
+            Distances to the neighbors (in the training samples `self._fit_X`)
+            of each query point to compute the LRD.
+
+        neighbors_indices : ndarray of shape (n_queries, self.n_neighbors)
+            Neighbors indices (of each query point) among training samples
+            self._fit_X.
+
+        Returns
+        -------
+        local_reachability_density : ndarray of shape (n_queries,)
+            The local reachability density of each sample.
+        """
+        dist_k = self._distances_fit_X_[neighbors_indices,
+                                        self.n_neighbors_ - 1]
+        reach_dist_array = np.maximum(distances_X, dist_k)
+
+        # 1e-10 to avoid `nan' when nb of duplicates > n_neighbors_:
+        return 1. / (np.mean(reach_dist_array, axis=1) + 1e-10)
--- a/venv/Lib/site-packages/sklearn/neighbors/_nca.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/_nca.py
@ -0,0 +1,527 @@
+# coding: utf-8
+"""
+Neighborhood Component Analysis
+"""
+
+# Authors: William de Vazelhes <wdevazelhes@gmail.com>
+#          John Chiotellis <ioannis.chiotellis@in.tum.de>
+# License: BSD 3 clause
+
+from __future__ import print_function
+
+from warnings import warn
+import numpy as np
+import sys
+import time
+import numbers
+from scipy.optimize import minimize
+from ..utils.extmath import softmax
+from ..metrics import pairwise_distances
+from ..base import BaseEstimator, TransformerMixin
+from ..preprocessing import LabelEncoder
+from ..decomposition import PCA
+from ..utils.multiclass import check_classification_targets
+from ..utils.random import check_random_state
+from ..utils.validation import check_is_fitted, check_array, check_scalar
+from ..utils.validation import _deprecate_positional_args
+from ..exceptions import ConvergenceWarning
+
+
+class NeighborhoodComponentsAnalysis(TransformerMixin, BaseEstimator):
+    """Neighborhood Components Analysis
+
+    Neighborhood Component Analysis (NCA) is a machine learning algorithm for
+    metric learning. It learns a linear transformation in a supervised fashion
+    to improve the classification accuracy of a stochastic nearest neighbors
+    rule in the transformed space.
+
+    Read more in the :ref:`User Guide <nca>`.
+
+    Parameters
+    ----------
+    n_components : int, default=None
+        Preferred dimensionality of the projected space.
+        If None it will be set to ``n_features``.
+
+    init : {'auto', 'pca', 'lda', 'identity', 'random'} or ndarray of shape \
+            (n_features_a, n_features_b), default='auto'
+        Initialization of the linear transformation. Possible options are
+        'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape
+        (n_features_a, n_features_b).
+
+        'auto'
+            Depending on ``n_components``, the most reasonable initialization
+            will be chosen. If ``n_components <= n_classes`` we use 'lda', as
+            it uses labels information. If not, but
+            ``n_components < min(n_features, n_samples)``, we use 'pca', as
+            it projects data in meaningful directions (those of higher
+            variance). Otherwise, we just use 'identity'.
+
+        'pca'
+            ``n_components`` principal components of the inputs passed
+            to :meth:`fit` will be used to initialize the transformation.
+            (See :class:`~sklearn.decomposition.PCA`)
+
+        'lda'
+            ``min(n_components, n_classes)`` most discriminative
+            components of the inputs passed to :meth:`fit` will be used to
+            initialize the transformation. (If ``n_components > n_classes``,
+            the rest of the components will be zero.) (See
+            :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)
+
+        'identity'
+            If ``n_components`` is strictly smaller than the
+            dimensionality of the inputs passed to :meth:`fit`, the identity
+            matrix will be truncated to the first ``n_components`` rows.
+
+        'random'
+            The initial transformation will be a random array of shape
+            `(n_components, n_features)`. Each value is sampled from the
+            standard normal distribution.
+
+        numpy array
+            n_features_b must match the dimensionality of the inputs passed to
+            :meth:`fit` and n_features_a must be less than or equal to that.
+            If ``n_components`` is not None, n_features_a must match it.
+
+    warm_start : bool, default=False
+        If True and :meth:`fit` has been called before, the solution of the
+        previous call to :meth:`fit` is used as the initial linear
+        transformation (``n_components`` and ``init`` will be ignored).
+
+    max_iter : int, default=50
+        Maximum number of iterations in the optimization.
+
+    tol : float, default=1e-5
+        Convergence tolerance for the optimization.
+
+    callback : callable, default=None
+        If not None, this function is called after every iteration of the
+        optimizer, taking as arguments the current solution (flattened
+        transformation matrix) and the number of iterations. This might be
+        useful in case one wants to examine or store the transformation
+        found after each iteration.
+
+    verbose : int, default=0
+        If 0, no progress messages will be printed.
+        If 1, progress messages will be printed to stdout.
+        If > 1, progress messages will be printed and the ``disp``
+        parameter of :func:`scipy.optimize.minimize` will be set to
+        ``verbose - 2``.
+
+    random_state : int or numpy.RandomState, default=None
+        A pseudo random number generator object or a seed for it if int. If
+        ``init='random'``, ``random_state`` is used to initialize the random
+        transformation. If ``init='pca'``, ``random_state`` is passed as an
+        argument to PCA when initializing the transformation. Pass an int
+        for reproducible results across multiple function calls.
+        See :term: `Glossary <random_state>`.
+
+    Attributes
+    ----------
+    components_ : ndarray of shape (n_components, n_features)
+        The linear transformation learned during fitting.
+
+    n_iter_ : int
+        Counts the number of iterations performed by the optimizer.
+
+    random_state_ : numpy.RandomState
+        Pseudo random number generator object used during initialization.
+
+    Examples
+    --------
+    >>> from sklearn.neighbors import NeighborhoodComponentsAnalysis
+    >>> from sklearn.neighbors import KNeighborsClassifier
+    >>> from sklearn.datasets import load_iris
+    >>> from sklearn.model_selection import train_test_split
+    >>> X, y = load_iris(return_X_y=True)
+    >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
+    ... stratify=y, test_size=0.7, random_state=42)
+    >>> nca = NeighborhoodComponentsAnalysis(random_state=42)
+    >>> nca.fit(X_train, y_train)
+    NeighborhoodComponentsAnalysis(...)
+    >>> knn = KNeighborsClassifier(n_neighbors=3)
+    >>> knn.fit(X_train, y_train)
+    KNeighborsClassifier(...)
+    >>> print(knn.score(X_test, y_test))
+    0.933333...
+    >>> knn.fit(nca.transform(X_train), y_train)
+    KNeighborsClassifier(...)
+    >>> print(knn.score(nca.transform(X_test), y_test))
+    0.961904...
+
+    References
+    ----------
+    .. [1] J. Goldberger, G. Hinton, S. Roweis, R. Salakhutdinov.
+           "Neighbourhood Components Analysis". Advances in Neural Information
+           Processing Systems. 17, 513-520, 2005.
+           http://www.cs.nyu.edu/~roweis/papers/ncanips.pdf
+
+    .. [2] Wikipedia entry on Neighborhood Components Analysis
+           https://en.wikipedia.org/wiki/Neighbourhood_components_analysis
+
+    """
+
+    @_deprecate_positional_args
+    def __init__(self, n_components=None, *, init='auto', warm_start=False,
+                 max_iter=50, tol=1e-5, callback=None, verbose=0,
+                 random_state=None):
+        self.n_components = n_components
+        self.init = init
+        self.warm_start = warm_start
+        self.max_iter = max_iter
+        self.tol = tol
+        self.callback = callback
+        self.verbose = verbose
+        self.random_state = random_state
+
+    def fit(self, X, y):
+        """Fit the model according to the given training data.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The training samples.
+
+        y : array-like of shape (n_samples,)
+            The corresponding training labels.
+
+        Returns
+        -------
+        self : object
+            returns a trained NeighborhoodComponentsAnalysis model.
+        """
+
+        # Verify inputs X and y and NCA parameters, and transform a copy if
+        # needed
+        X, y, init = self._validate_params(X, y)
+
+        # Initialize the random generator
+        self.random_state_ = check_random_state(self.random_state)
+
+        # Measure the total training time
+        t_train = time.time()
+
+        # Compute a mask that stays fixed during optimization:
+        same_class_mask = y[:, np.newaxis] == y[np.newaxis, :]
+        # (n_samples, n_samples)
+
+        # Initialize the transformation
+        transformation = self._initialize(X, y, init)
+
+        # Create a dictionary of parameters to be passed to the optimizer
+        disp = self.verbose - 2 if self.verbose > 1 else -1
+        optimizer_params = {'method': 'L-BFGS-B',
+                            'fun': self._loss_grad_lbfgs,
+                            'args': (X, same_class_mask, -1.0),
+                            'jac': True,
+                            'x0': transformation,
+                            'tol': self.tol,
+                            'options': dict(maxiter=self.max_iter, disp=disp),
+                            'callback': self._callback
+                            }
+
+        # Call the optimizer
+        self.n_iter_ = 0
+        opt_result = minimize(**optimizer_params)
+
+        # Reshape the solution found by the optimizer
+        self.components_ = opt_result.x.reshape(-1, X.shape[1])
+
+        # Stop timer
+        t_train = time.time() - t_train
+        if self.verbose:
+            cls_name = self.__class__.__name__
+
+            # Warn the user if the algorithm did not converge
+            if not opt_result.success:
+                warn('[{}] NCA did not converge: {}'.format(
+                    cls_name, opt_result.message),
+                     ConvergenceWarning)
+
+            print('[{}] Training took {:8.2f}s.'.format(cls_name, t_train))
+
+        return self
+
+    def transform(self, X):
+        """Applies the learned transformation to the given data.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Data samples.
+
+        Returns
+        -------
+        X_embedded: ndarray of shape (n_samples, n_components)
+            The data samples transformed.
+
+        Raises
+        ------
+        NotFittedError
+            If :meth:`fit` has not been called before.
+        """
+
+        check_is_fitted(self)
+        X = check_array(X)
+
+        return np.dot(X, self.components_.T)
+
+    def _validate_params(self, X, y):
+        """Validate parameters as soon as :meth:`fit` is called.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The training samples.
+
+        y : array-like of shape (n_samples,)
+            The corresponding training labels.
+
+        Returns
+        -------
+        X : ndarray of shape (n_samples, n_features)
+            The validated training samples.
+
+        y : ndarray of shape (n_samples,)
+            The validated training labels, encoded to be integers in
+            the range(0, n_classes).
+
+        init : str or ndarray of shape (n_features_a, n_features_b)
+            The validated initialization of the linear transformation.
+
+        Raises
+        -------
+        TypeError
+            If a parameter is not an instance of the desired type.
+
+        ValueError
+            If a parameter's value violates its legal value range or if the
+            combination of two or more given parameters is incompatible.
+        """
+
+        # Validate the inputs X and y, and converts y to numerical classes.
+        X, y = self._validate_data(X, y, ensure_min_samples=2)
+        check_classification_targets(y)
+        y = LabelEncoder().fit_transform(y)
+
+        # Check the preferred dimensionality of the projected space
+        if self.n_components is not None:
+            check_scalar(
+                self.n_components, 'n_components', numbers.Integral, min_val=1)
+
+            if self.n_components > X.shape[1]:
+                raise ValueError('The preferred dimensionality of the '
+                                 'projected space `n_components` ({}) cannot '
+                                 'be greater than the given data '
+                                 'dimensionality ({})!'
+                                 .format(self.n_components, X.shape[1]))
+
+        # If warm_start is enabled, check that the inputs are consistent
+        check_scalar(self.warm_start, 'warm_start', bool)
+        if self.warm_start and hasattr(self, 'components_'):
+            if self.components_.shape[1] != X.shape[1]:
+                raise ValueError('The new inputs dimensionality ({}) does not '
+                                 'match the input dimensionality of the '
+                                 'previously learned transformation ({}).'
+                                 .format(X.shape[1],
+                                         self.components_.shape[1]))
+
+        check_scalar(self.max_iter, 'max_iter', numbers.Integral, min_val=1)
+        check_scalar(self.tol, 'tol', numbers.Real, min_val=0.)
+        check_scalar(self.verbose, 'verbose', numbers.Integral, min_val=0)
+
+        if self.callback is not None:
+            if not callable(self.callback):
+                raise ValueError('`callback` is not callable.')
+
+        # Check how the linear transformation should be initialized
+        init = self.init
+
+        if isinstance(init, np.ndarray):
+            init = check_array(init)
+
+            # Assert that init.shape[1] = X.shape[1]
+            if init.shape[1] != X.shape[1]:
+                raise ValueError(
+                    'The input dimensionality ({}) of the given '
+                    'linear transformation `init` must match the '
+                    'dimensionality of the given inputs `X` ({}).'
+                    .format(init.shape[1], X.shape[1]))
+
+            # Assert that init.shape[0] <= init.shape[1]
+            if init.shape[0] > init.shape[1]:
+                raise ValueError(
+                    'The output dimensionality ({}) of the given '
+                    'linear transformation `init` cannot be '
+                    'greater than its input dimensionality ({}).'
+                    .format(init.shape[0], init.shape[1]))
+
+            if self.n_components is not None:
+                # Assert that self.n_components = init.shape[0]
+                if self.n_components != init.shape[0]:
+                    raise ValueError('The preferred dimensionality of the '
+                                     'projected space `n_components` ({}) does'
+                                     ' not match the output dimensionality of '
+                                     'the given linear transformation '
+                                     '`init` ({})!'
+                                     .format(self.n_components,
+                                             init.shape[0]))
+        elif init in ['auto', 'pca', 'lda', 'identity', 'random']:
+            pass
+        else:
+            raise ValueError(
+                "`init` must be 'auto', 'pca', 'lda', 'identity', 'random' "
+                "or a numpy array of shape (n_components, n_features).")
+
+        return X, y, init
+
+    def _initialize(self, X, y, init):
+        """Initialize the transformation.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The training samples.
+
+        y : array-like of shape (n_samples,)
+            The training labels.
+
+        init : str or ndarray of shape (n_features_a, n_features_b)
+            The validated initialization of the linear transformation.
+
+        Returns
+        -------
+        transformation : ndarray of shape (n_components, n_features)
+            The initialized linear transformation.
+
+        """
+
+        transformation = init
+        if self.warm_start and hasattr(self, 'components_'):
+            transformation = self.components_
+        elif isinstance(init, np.ndarray):
+            pass
+        else:
+            n_samples, n_features = X.shape
+            n_components = self.n_components or n_features
+            if init == 'auto':
+                n_classes = len(np.unique(y))
+                if n_components <= min(n_features, n_classes - 1):
+                    init = 'lda'
+                elif n_components < min(n_features, n_samples):
+                    init = 'pca'
+                else:
+                    init = 'identity'
+            if init == 'identity':
+                transformation = np.eye(n_components, X.shape[1])
+            elif init == 'random':
+                transformation = self.random_state_.randn(n_components,
+                                                          X.shape[1])
+            elif init in {'pca', 'lda'}:
+                init_time = time.time()
+                if init == 'pca':
+                    pca = PCA(n_components=n_components,
+                              random_state=self.random_state_)
+                    if self.verbose:
+                        print('Finding principal components... ', end='')
+                        sys.stdout.flush()
+                    pca.fit(X)
+                    transformation = pca.components_
+                elif init == 'lda':
+                    from ..discriminant_analysis import (
+                        LinearDiscriminantAnalysis)
+                    lda = LinearDiscriminantAnalysis(n_components=n_components)
+                    if self.verbose:
+                        print('Finding most discriminative components... ',
+                              end='')
+                        sys.stdout.flush()
+                    lda.fit(X, y)
+                    transformation = lda.scalings_.T[:n_components]
+                if self.verbose:
+                    print('done in {:5.2f}s'.format(time.time() - init_time))
+        return transformation
+
+    def _callback(self, transformation):
+        """Called after each iteration of the optimizer.
+
+        Parameters
+        ----------
+        transformation : ndarray of shape (n_components * n_features,)
+            The solution computed by the optimizer in this iteration.
+        """
+        if self.callback is not None:
+            self.callback(transformation, self.n_iter_)
+
+        self.n_iter_ += 1
+
+    def _loss_grad_lbfgs(self, transformation, X, same_class_mask, sign=1.0):
+        """Compute the loss and the loss gradient w.r.t. ``transformation``.
+
+        Parameters
+        ----------
+        transformation : ndarray of shape (n_components * n_features,)
+            The raveled linear transformation on which to compute loss and
+            evaluate gradient.
+
+        X : ndarray of shape (n_samples, n_features)
+            The training samples.
+
+        same_class_mask : ndarray of shape (n_samples, n_samples)
+            A mask where ``mask[i, j] == 1`` if ``X[i]`` and ``X[j]`` belong
+            to the same class, and ``0`` otherwise.
+
+        Returns
+        -------
+        loss : float
+            The loss computed for the given transformation.
+
+        gradient : ndarray of shape (n_components * n_features,)
+            The new (flattened) gradient of the loss.
+        """
+
+        if self.n_iter_ == 0:
+            self.n_iter_ += 1
+            if self.verbose:
+                header_fields = ['Iteration', 'Objective Value', 'Time(s)']
+                header_fmt = '{:>10} {:>20} {:>10}'
+                header = header_fmt.format(*header_fields)
+                cls_name = self.__class__.__name__
+                print('[{}]'.format(cls_name))
+                print('[{}] {}\n[{}] {}'.format(cls_name, header,
+                                                cls_name, '-' * len(header)))
+
+        t_funcall = time.time()
+
+        transformation = transformation.reshape(-1, X.shape[1])
+        X_embedded = np.dot(X, transformation.T)  # (n_samples, n_components)
+
+        # Compute softmax distances
+        p_ij = pairwise_distances(X_embedded, squared=True)
+        np.fill_diagonal(p_ij, np.inf)
+        p_ij = softmax(-p_ij)  # (n_samples, n_samples)
+
+        # Compute loss
+        masked_p_ij = p_ij * same_class_mask
+        p = np.sum(masked_p_ij, axis=1, keepdims=True)  # (n_samples, 1)
+        loss = np.sum(p)
+
+        # Compute gradient of loss w.r.t. `transform`
+        weighted_p_ij = masked_p_ij - p_ij * p
+        weighted_p_ij_sym = weighted_p_ij + weighted_p_ij.T
+        np.fill_diagonal(weighted_p_ij_sym, -weighted_p_ij.sum(axis=0))
+        gradient = 2 * X_embedded.T.dot(weighted_p_ij_sym).dot(X)
+        # time complexity of the gradient: O(n_components x n_samples x (
+        # n_samples + n_features))
+
+        if self.verbose:
+            t_funcall = time.time() - t_funcall
+            values_fmt = '[{}] {:>10} {:>20.6e} {:>10.2f}'
+            print(values_fmt.format(self.__class__.__name__, self.n_iter_,
+                                    loss, t_funcall))
+            sys.stdout.flush()
+
+        return sign * loss, sign * gradient.ravel()
+
+    def _more_tags(self):
+        return {'requires_y': True}
--- a/venv/Lib/site-packages/sklearn/neighbors/_nearest_centroid.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/_nearest_centroid.py
@ -0,0 +1,203 @@
+# -*- coding: utf-8 -*-
+"""
+Nearest Centroid Classification
+"""
+
+# Author: Robert Layton <robertlayton@gmail.com>
+#         Olivier Grisel <olivier.grisel@ensta.org>
+#
+# License: BSD 3 clause
+
+import warnings
+import numpy as np
+from scipy import sparse as sp
+
+from ..base import BaseEstimator, ClassifierMixin
+from ..metrics.pairwise import pairwise_distances
+from ..preprocessing import LabelEncoder
+from ..utils.validation import check_array, check_is_fitted
+from ..utils.validation import _deprecate_positional_args
+from ..utils.sparsefuncs import csc_median_axis_0
+from ..utils.multiclass import check_classification_targets
+
+
+class NearestCentroid(ClassifierMixin, BaseEstimator):
+    """Nearest centroid classifier.
+
+    Each class is represented by its centroid, with test samples classified to
+    the class with the nearest centroid.
+
+    Read more in the :ref:`User Guide <nearest_centroid_classifier>`.
+
+    Parameters
+    ----------
+    metric : str or callable
+        The metric to use when calculating distance between instances in a
+        feature array. If metric is a string or callable, it must be one of
+        the options allowed by metrics.pairwise.pairwise_distances for its
+        metric parameter.
+        The centroids for the samples corresponding to each class is the point
+        from which the sum of the distances (according to the metric) of all
+        samples that belong to that particular class are minimized.
+        If the "manhattan" metric is provided, this centroid is the median and
+        for all other metrics, the centroid is now set to be the mean.
+
+        .. versionchanged:: 0.19
+            ``metric='precomputed'`` was deprecated and now raises an error
+
+    shrink_threshold : float, default=None
+        Threshold for shrinking centroids to remove features.
+
+    Attributes
+    ----------
+    centroids_ : array-like of shape (n_classes, n_features)
+        Centroid of each class.
+
+    classes_ : array of shape (n_classes,)
+        The unique classes labels.
+
+    Examples
+    --------
+    >>> from sklearn.neighbors import NearestCentroid
+    >>> import numpy as np
+    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
+    >>> y = np.array([1, 1, 1, 2, 2, 2])
+    >>> clf = NearestCentroid()
+    >>> clf.fit(X, y)
+    NearestCentroid()
+    >>> print(clf.predict([[-0.8, -1]]))
+    [1]
+
+    See also
+    --------
+    sklearn.neighbors.KNeighborsClassifier: nearest neighbors classifier
+
+    Notes
+    -----
+    When used for text classification with tf-idf vectors, this classifier is
+    also known as the Rocchio classifier.
+
+    References
+    ----------
+    Tibshirani, R., Hastie, T., Narasimhan, B., & Chu, G. (2002). Diagnosis of
+    multiple cancer types by shrunken centroids of gene expression. Proceedings
+    of the National Academy of Sciences of the United States of America,
+    99(10), 6567-6572. The National Academy of Sciences.
+
+    """
+
+    @_deprecate_positional_args
+    def __init__(self, metric='euclidean', *, shrink_threshold=None):
+        self.metric = metric
+        self.shrink_threshold = shrink_threshold
+
+    def fit(self, X, y):
+        """
+        Fit the NearestCentroid model according to the given training data.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training vector, where n_samples is the number of samples and
+            n_features is the number of features.
+            Note that centroid shrinking cannot be used with sparse matrices.
+        y : array-like of shape (n_samples,)
+            Target values (integers)
+        """
+        if self.metric == 'precomputed':
+            raise ValueError("Precomputed is not supported.")
+        # If X is sparse and the metric is "manhattan", store it in a csc
+        # format is easier to calculate the median.
+        if self.metric == 'manhattan':
+            X, y = self._validate_data(X, y, accept_sparse=['csc'])
+        else:
+            X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc'])
+        is_X_sparse = sp.issparse(X)
+        if is_X_sparse and self.shrink_threshold:
+            raise ValueError("threshold shrinking not supported"
+                             " for sparse input")
+        check_classification_targets(y)
+
+        n_samples, n_features = X.shape
+        le = LabelEncoder()
+        y_ind = le.fit_transform(y)
+        self.classes_ = classes = le.classes_
+        n_classes = classes.size
+        if n_classes < 2:
+            raise ValueError('The number of classes has to be greater than'
+                             ' one; got %d class' % (n_classes))
+
+        # Mask mapping each class to its members.
+        self.centroids_ = np.empty((n_classes, n_features), dtype=np.float64)
+        # Number of clusters in each class.
+        nk = np.zeros(n_classes)
+
+        for cur_class in range(n_classes):
+            center_mask = y_ind == cur_class
+            nk[cur_class] = np.sum(center_mask)
+            if is_X_sparse:
+                center_mask = np.where(center_mask)[0]
+
+            # XXX: Update other averaging methods according to the metrics.
+            if self.metric == "manhattan":
+                # NumPy does not calculate median of sparse matrices.
+                if not is_X_sparse:
+                    self.centroids_[cur_class] = np.median(X[center_mask], axis=0)
+                else:
+                    self.centroids_[cur_class] = csc_median_axis_0(X[center_mask])
+            else:
+                if self.metric != 'euclidean':
+                    warnings.warn("Averaging for metrics other than "
+                                  "euclidean and manhattan not supported. "
+                                  "The average is set to be the mean."
+                                  )
+                self.centroids_[cur_class] = X[center_mask].mean(axis=0)
+
+        if self.shrink_threshold:
+            dataset_centroid_ = np.mean(X, axis=0)
+
+            # m parameter for determining deviation
+            m = np.sqrt((1. / nk) - (1. / n_samples))
+            # Calculate deviation using the standard deviation of centroids.
+            variance = (X - self.centroids_[y_ind]) ** 2
+            variance = variance.sum(axis=0)
+            s = np.sqrt(variance / (n_samples - n_classes))
+            s += np.median(s)  # To deter outliers from affecting the results.
+            mm = m.reshape(len(m), 1)  # Reshape to allow broadcasting.
+            ms = mm * s
+            deviation = ((self.centroids_ - dataset_centroid_) / ms)
+            # Soft thresholding: if the deviation crosses 0 during shrinking,
+            # it becomes zero.
+            signs = np.sign(deviation)
+            deviation = (np.abs(deviation) - self.shrink_threshold)
+            np.clip(deviation, 0, None, out=deviation)
+            deviation *= signs
+            # Now adjust the centroids using the deviation
+            msd = ms * deviation
+            self.centroids_ = dataset_centroid_[np.newaxis, :] + msd
+        return self
+
+    def predict(self, X):
+        """Perform classification on an array of test vectors X.
+
+        The predicted class C for each sample in X is returned.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+
+        Returns
+        -------
+        C : ndarray of shape (n_samples,)
+
+        Notes
+        -----
+        If the metric constructor parameter is "precomputed", X is assumed to
+        be the distance matrix between the data to be predicted and
+        ``self.centroids_``.
+        """
+        check_is_fitted(self)
+
+        X = check_array(X, accept_sparse='csr')
+        return self.classes_[pairwise_distances(
+            X, self.centroids_, metric=self.metric).argmin(axis=1)]
--- a/venv/Lib/site-packages/sklearn/neighbors/_quad_tree.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/neighbors/_quad_tree.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/neighbors/_quad_tree.pxd
+++ b/venv/Lib/site-packages/sklearn/neighbors/_quad_tree.pxd
@ -0,0 +1,101 @@
+# cython: boundscheck=False
+# cython: wraparound=False
+# cython: cdivision=True
+#
+# Author: Thomas Moreau <thomas.moreau.2010@gmail.com>
+# Author: Olivier Grisel <olivier.grisel@ensta.fr>
+
+# See quad_tree.pyx for details.
+
+import numpy as np
+cimport numpy as np
+
+ctypedef np.npy_float32 DTYPE_t          # Type of X
+ctypedef np.npy_intp SIZE_t              # Type for indices and counters
+ctypedef np.npy_int32 INT32_t            # Signed 32 bit integer
+ctypedef np.npy_uint32 UINT32_t          # Unsigned 32 bit integer
+
+# This is effectively an ifdef statement in Cython
+# It allows us to write printf debugging lines
+# and remove them at compile time
+cdef enum:
+    DEBUGFLAG = 0
+
+cdef float EPSILON = 1e-6
+
+# XXX: Careful to not change the order of the arguments. It is important to
+# have is_leaf and max_width consecutive as it permits to avoid padding by
+# the compiler and keep the size coherent for both C and numpy data structures.
+cdef struct Cell:
+    # Base storage structure for cells in a QuadTree object
+
+    # Tree structure
+    SIZE_t parent              # Parent cell of this cell
+    SIZE_t[8] children         # Array pointing to childrens of this cell
+
+    # Cell description
+    SIZE_t cell_id             # Id of the cell in the cells array in the Tree
+    SIZE_t point_index         # Index of the point at this cell (only defined
+                               # in non empty leaf)
+    bint is_leaf               # Does this cell have children?
+    DTYPE_t squared_max_width  # Squared value of the maximum width w
+    SIZE_t depth               # Depth of the cell in the tree
+    SIZE_t cumulative_size     # Number of points included in the subtree with
+                               # this cell as a root.
+
+    # Internal constants
+    DTYPE_t[3] center          # Store the center for quick split of cells
+    DTYPE_t[3] barycenter      # Keep track of the center of mass of the cell
+
+    # Cell boundaries
+    DTYPE_t[3] min_bounds      # Inferior boundaries of this cell (inclusive)
+    DTYPE_t[3] max_bounds      # Superior boundaries of this cell (exclusive)
+
+
+cdef class _QuadTree:
+    # The QuadTree object is a quad tree structure constructed by inserting
+    # recursively points in the tree and splitting cells in 4 so that each
+    # leaf cell contains at most one point.
+    # This structure also handle 3D data, inserted in trees with 8 children
+    # for each node.
+
+    # Parameters of the tree
+    cdef public int n_dimensions         # Number of dimensions in X
+    cdef public int verbose              # Verbosity of the output
+    cdef SIZE_t n_cells_per_cell         # Number of children per node. (2 ** n_dimension)
+
+    # Tree inner structure
+    cdef public SIZE_t max_depth         # Max depth of the tree
+    cdef public SIZE_t cell_count        # Counter for node IDs
+    cdef public SIZE_t capacity          # Capacity of tree, in terms of nodes
+    cdef public SIZE_t n_points          # Total number of points
+    cdef Cell* cells                     # Array of nodes
+
+    # Point insertion methods
+    cdef int insert_point(self, DTYPE_t[3] point, SIZE_t point_index,
+                          SIZE_t cell_id=*) nogil except -1
+    cdef SIZE_t _insert_point_in_new_child(self, DTYPE_t[3] point, Cell* cell,
+                                           SIZE_t point_index, SIZE_t size=*
+                                           ) nogil
+    cdef SIZE_t _select_child(self, DTYPE_t[3] point, Cell* cell) nogil
+    cdef bint _is_duplicate(self, DTYPE_t[3] point1, DTYPE_t[3] point2) nogil
+
+    # Create a summary of the Tree compare to a query point
+    cdef long summarize(self, DTYPE_t[3] point, DTYPE_t* results,
+                        float squared_theta=*, int cell_id=*, long idx=*
+                        ) nogil
+
+    # Internal cell initialization methods
+    cdef void _init_cell(self, Cell* cell, SIZE_t parent, SIZE_t depth) nogil
+    cdef void _init_root(self, DTYPE_t[3] min_bounds, DTYPE_t[3] max_bounds
+                         ) nogil
+
+    # Private methods
+    cdef int _check_point_in_cell(self, DTYPE_t[3] point, Cell* cell
+                                  ) nogil except -1
+
+    # Private array manipulation to manage the ``cells`` array
+    cdef int _resize(self, SIZE_t capacity) nogil except -1
+    cdef int _resize_c(self, SIZE_t capacity=*) nogil except -1
+    cdef int _get_cell(self, DTYPE_t[3] point, SIZE_t cell_id=*) nogil except -1
+    cdef np.ndarray _get_cell_ndarray(self)
--- a/venv/Lib/site-packages/sklearn/neighbors/_regression.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/_regression.py
@ -0,0 +1,371 @@
+"""Nearest Neighbor Regression"""
+
+# Authors: Jake Vanderplas <vanderplas@astro.washington.edu>
+#          Fabian Pedregosa <fabian.pedregosa@inria.fr>
+#          Alexandre Gramfort <alexandre.gramfort@inria.fr>
+#          Sparseness support by Lars Buitinck
+#          Multi-output support by Arnaud Joly <a.joly@ulg.ac.be>
+#          Empty radius support by Andreas Bjerre-Nielsen
+#
+# License: BSD 3 clause (C) INRIA, University of Amsterdam,
+#                           University of Copenhagen
+
+import warnings
+
+import numpy as np
+
+from ._base import _get_weights, _check_weights, NeighborsBase, KNeighborsMixin
+from ._base import RadiusNeighborsMixin, SupervisedFloatMixin
+from ..base import RegressorMixin
+from ..utils import check_array
+from ..utils.validation import _deprecate_positional_args
+
+
+class KNeighborsRegressor(NeighborsBase, KNeighborsMixin,
+                          SupervisedFloatMixin,
+                          RegressorMixin):
+    """Regression based on k-nearest neighbors.
+
+    The target is predicted by local interpolation of the targets
+    associated of the nearest neighbors in the training set.
+
+    Read more in the :ref:`User Guide <regression>`.
+
+    .. versionadded:: 0.9
+
+    Parameters
+    ----------
+    n_neighbors : int, default=5
+        Number of neighbors to use by default for :meth:`kneighbors` queries.
+
+    weights : {'uniform', 'distance'} or callable, default='uniform'
+        weight function used in prediction.  Possible values:
+
+        - 'uniform' : uniform weights.  All points in each neighborhood
+          are weighted equally.
+        - 'distance' : weight points by the inverse of their distance.
+          in this case, closer neighbors of a query point will have a
+          greater influence than neighbors which are further away.
+        - [callable] : a user-defined function which accepts an
+          array of distances, and returns an array of the same shape
+          containing the weights.
+
+        Uniform weights are used by default.
+
+    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
+        Algorithm used to compute the nearest neighbors:
+
+        - 'ball_tree' will use :class:`BallTree`
+        - 'kd_tree' will use :class:`KDTree`
+        - 'brute' will use a brute-force search.
+        - 'auto' will attempt to decide the most appropriate algorithm
+          based on the values passed to :meth:`fit` method.
+
+        Note: fitting on sparse input will override the setting of
+        this parameter, using brute force.
+
+    leaf_size : int, default=30
+        Leaf size passed to BallTree or KDTree.  This can affect the
+        speed of the construction and query, as well as the memory
+        required to store the tree.  The optimal value depends on the
+        nature of the problem.
+
+    p : int, default=2
+        Power parameter for the Minkowski metric. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
+    metric : str or callable, default='minkowski'
+        the distance metric to use for the tree.  The default metric is
+        minkowski, and with p=2 is equivalent to the standard Euclidean
+        metric. See the documentation of :class:`DistanceMetric` for a
+        list of available metrics.
+        If metric is "precomputed", X is assumed to be a distance matrix and
+        must be square during fit. X may be a :term:`sparse graph`,
+        in which case only "nonzero" elements may be considered neighbors.
+
+    metric_params : dict, default=None
+        Additional keyword arguments for the metric function.
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run for neighbors search.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+        Doesn't affect :meth:`fit` method.
+
+    Attributes
+    ----------
+    effective_metric_ : str or callable
+        The distance metric to use. It will be same as the `metric` parameter
+        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to
+        'minkowski' and `p` parameter set to 2.
+
+    effective_metric_params_ : dict
+        Additional keyword arguments for the metric function. For most metrics
+        will be same with `metric_params` parameter, but may also contain the
+        `p` parameter value if the `effective_metric_` attribute is set to
+        'minkowski'.
+
+    Examples
+    --------
+    >>> X = [[0], [1], [2], [3]]
+    >>> y = [0, 0, 1, 1]
+    >>> from sklearn.neighbors import KNeighborsRegressor
+    >>> neigh = KNeighborsRegressor(n_neighbors=2)
+    >>> neigh.fit(X, y)
+    KNeighborsRegressor(...)
+    >>> print(neigh.predict([[1.5]]))
+    [0.5]
+
+    See also
+    --------
+    NearestNeighbors
+    RadiusNeighborsRegressor
+    KNeighborsClassifier
+    RadiusNeighborsClassifier
+
+    Notes
+    -----
+    See :ref:`Nearest Neighbors <neighbors>` in the online documentation
+    for a discussion of the choice of ``algorithm`` and ``leaf_size``.
+
+    .. warning::
+
+       Regarding the Nearest Neighbors algorithms, if it is found that two
+       neighbors, neighbor `k+1` and `k`, have identical distances but
+       different labels, the results will depend on the ordering of the
+       training data.
+
+    https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm
+    """
+
+    @_deprecate_positional_args
+    def __init__(self, n_neighbors=5, *, weights='uniform',
+                 algorithm='auto', leaf_size=30,
+                 p=2, metric='minkowski', metric_params=None, n_jobs=None,
+                 **kwargs):
+        super().__init__(
+              n_neighbors=n_neighbors,
+              algorithm=algorithm,
+              leaf_size=leaf_size, metric=metric, p=p,
+              metric_params=metric_params, n_jobs=n_jobs, **kwargs)
+        self.weights = _check_weights(weights)
+
+    @property
+    def _pairwise(self):
+        # For cross-validation routines to split data correctly
+        return self.metric == 'precomputed'
+
+    def predict(self, X):
+        """Predict the target for the provided data
+
+        Parameters
+        ----------
+        X : array-like of shape (n_queries, n_features), \
+                or (n_queries, n_indexed) if metric == 'precomputed'
+            Test samples.
+
+        Returns
+        -------
+        y : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=int
+            Target values.
+        """
+        X = check_array(X, accept_sparse='csr')
+
+        neigh_dist, neigh_ind = self.kneighbors(X)
+
+        weights = _get_weights(neigh_dist, self.weights)
+
+        _y = self._y
+        if _y.ndim == 1:
+            _y = _y.reshape((-1, 1))
+
+        if weights is None:
+            y_pred = np.mean(_y[neigh_ind], axis=1)
+        else:
+            y_pred = np.empty((X.shape[0], _y.shape[1]), dtype=np.float64)
+            denom = np.sum(weights, axis=1)
+
+            for j in range(_y.shape[1]):
+                num = np.sum(_y[neigh_ind, j] * weights, axis=1)
+                y_pred[:, j] = num / denom
+
+        if self._y.ndim == 1:
+            y_pred = y_pred.ravel()
+
+        return y_pred
+
+
+class RadiusNeighborsRegressor(NeighborsBase, RadiusNeighborsMixin,
+                               SupervisedFloatMixin,
+                               RegressorMixin):
+    """Regression based on neighbors within a fixed radius.
+
+    The target is predicted by local interpolation of the targets
+    associated of the nearest neighbors in the training set.
+
+    Read more in the :ref:`User Guide <regression>`.
+
+    .. versionadded:: 0.9
+
+    Parameters
+    ----------
+    radius : float, default=1.0
+        Range of parameter space to use by default for :meth:`radius_neighbors`
+        queries.
+
+    weights : {'uniform', 'distance'} or callable, default='uniform'
+        weight function used in prediction.  Possible values:
+
+        - 'uniform' : uniform weights.  All points in each neighborhood
+          are weighted equally.
+        - 'distance' : weight points by the inverse of their distance.
+          in this case, closer neighbors of a query point will have a
+          greater influence than neighbors which are further away.
+        - [callable] : a user-defined function which accepts an
+          array of distances, and returns an array of the same shape
+          containing the weights.
+
+        Uniform weights are used by default.
+
+    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
+        Algorithm used to compute the nearest neighbors:
+
+        - 'ball_tree' will use :class:`BallTree`
+        - 'kd_tree' will use :class:`KDTree`
+        - 'brute' will use a brute-force search.
+        - 'auto' will attempt to decide the most appropriate algorithm
+          based on the values passed to :meth:`fit` method.
+
+        Note: fitting on sparse input will override the setting of
+        this parameter, using brute force.
+
+    leaf_size : int, default=30
+        Leaf size passed to BallTree or KDTree.  This can affect the
+        speed of the construction and query, as well as the memory
+        required to store the tree.  The optimal value depends on the
+        nature of the problem.
+
+    p : int, default=2
+        Power parameter for the Minkowski metric. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
+    metric : str or callable, default='minkowski'
+        the distance metric to use for the tree.  The default metric is
+        minkowski, and with p=2 is equivalent to the standard Euclidean
+        metric. See the documentation of :class:`DistanceMetric` for a
+        list of available metrics.
+        If metric is "precomputed", X is assumed to be a distance matrix and
+        must be square during fit. X may be a :term:`sparse graph`,
+        in which case only "nonzero" elements may be considered neighbors.
+
+    metric_params : dict, default=None
+        Additional keyword arguments for the metric function.
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run for neighbors search.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    effective_metric_ : str or callable
+        The distance metric to use. It will be same as the `metric` parameter
+        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to
+        'minkowski' and `p` parameter set to 2.
+
+    effective_metric_params_ : dict
+        Additional keyword arguments for the metric function. For most metrics
+        will be same with `metric_params` parameter, but may also contain the
+        `p` parameter value if the `effective_metric_` attribute is set to
+        'minkowski'.
+
+    Examples
+    --------
+    >>> X = [[0], [1], [2], [3]]
+    >>> y = [0, 0, 1, 1]
+    >>> from sklearn.neighbors import RadiusNeighborsRegressor
+    >>> neigh = RadiusNeighborsRegressor(radius=1.0)
+    >>> neigh.fit(X, y)
+    RadiusNeighborsRegressor(...)
+    >>> print(neigh.predict([[1.5]]))
+    [0.5]
+
+    See also
+    --------
+    NearestNeighbors
+    KNeighborsRegressor
+    KNeighborsClassifier
+    RadiusNeighborsClassifier
+
+    Notes
+    -----
+    See :ref:`Nearest Neighbors <neighbors>` in the online documentation
+    for a discussion of the choice of ``algorithm`` and ``leaf_size``.
+
+    https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm
+    """
+
+    @_deprecate_positional_args
+    def __init__(self, radius=1.0, *, weights='uniform',
+                 algorithm='auto', leaf_size=30,
+                 p=2, metric='minkowski', metric_params=None, n_jobs=None,
+                 **kwargs):
+        super().__init__(
+              radius=radius,
+              algorithm=algorithm,
+              leaf_size=leaf_size,
+              p=p, metric=metric, metric_params=metric_params,
+              n_jobs=n_jobs, **kwargs)
+        self.weights = _check_weights(weights)
+
+    def predict(self, X):
+        """Predict the target for the provided data
+
+        Parameters
+        ----------
+        X : array-like of shape (n_queries, n_features), \
+                or (n_queries, n_indexed) if metric == 'precomputed'
+            Test samples.
+
+        Returns
+        -------
+        y : ndarray of shape (n_queries,) or (n_queries, n_outputs), \
+                dtype=double
+            Target values.
+        """
+        X = check_array(X, accept_sparse='csr')
+
+        neigh_dist, neigh_ind = self.radius_neighbors(X)
+
+        weights = _get_weights(neigh_dist, self.weights)
+
+        _y = self._y
+        if _y.ndim == 1:
+            _y = _y.reshape((-1, 1))
+
+        empty_obs = np.full_like(_y[0], np.nan)
+
+        if weights is None:
+            y_pred = np.array([np.mean(_y[ind, :], axis=0)
+                               if len(ind) else empty_obs
+                               for (i, ind) in enumerate(neigh_ind)])
+
+        else:
+            y_pred = np.array([np.average(_y[ind, :], axis=0,
+                               weights=weights[i])
+                               if len(ind) else empty_obs
+                               for (i, ind) in enumerate(neigh_ind)])
+
+        if np.any(np.isnan(y_pred)):
+            empty_warning_msg = ("One or more samples have no neighbors "
+                                 "within specified radius; predicting NaN.")
+            warnings.warn(empty_warning_msg)
+
+        if self._y.ndim == 1:
+            y_pred = y_pred.ravel()
+
+        return y_pred
--- a/venv/Lib/site-packages/sklearn/neighbors/_typedefs.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/neighbors/_typedefs.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/neighbors/_typedefs.pxd
+++ b/venv/Lib/site-packages/sklearn/neighbors/_typedefs.pxd
@ -0,0 +1,18 @@
+#!python
+cimport numpy as np
+
+# Floating point/data type
+ctypedef np.float64_t DTYPE_t  # WARNING: should match DTYPE in typedefs.pyx
+
+cdef enum:
+    DTYPECODE = np.NPY_FLOAT64
+    ITYPECODE = np.NPY_INTP
+
+# Index/integer type.
+#  WARNING: ITYPE_t must be a signed integer type or you will have a bad time!
+ctypedef np.intp_t ITYPE_t  # WARNING: should match ITYPE in typedefs.pyx
+
+# Fused type for certain operations
+ctypedef fused DITYPE_t:
+    ITYPE_t
+    DTYPE_t
--- a/venv/Lib/site-packages/sklearn/neighbors/_unsupervised.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/_unsupervised.py
@ -0,0 +1,118 @@
+"""Unsupervised nearest neighbors learner"""
+from ._base import NeighborsBase
+from ._base import KNeighborsMixin
+from ._base import RadiusNeighborsMixin
+from ._base import UnsupervisedMixin
+from ..utils.validation import _deprecate_positional_args
+
+
+class NearestNeighbors(KNeighborsMixin, RadiusNeighborsMixin,
+                       UnsupervisedMixin, NeighborsBase):
+    """Unsupervised learner for implementing neighbor searches.
+
+    Read more in the :ref:`User Guide <unsupervised_neighbors>`.
+
+    .. versionadded:: 0.9
+
+    Parameters
+    ----------
+    n_neighbors : int, default=5
+        Number of neighbors to use by default for :meth:`kneighbors` queries.
+
+    radius : float, default=1.0
+        Range of parameter space to use by default for :meth:`radius_neighbors`
+        queries.
+
+    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
+        Algorithm used to compute the nearest neighbors:
+
+        - 'ball_tree' will use :class:`BallTree`
+        - 'kd_tree' will use :class:`KDTree`
+        - 'brute' will use a brute-force search.
+        - 'auto' will attempt to decide the most appropriate algorithm
+          based on the values passed to :meth:`fit` method.
+
+        Note: fitting on sparse input will override the setting of
+        this parameter, using brute force.
+
+    leaf_size : int, default=30
+        Leaf size passed to BallTree or KDTree.  This can affect the
+        speed of the construction and query, as well as the memory
+        required to store the tree.  The optimal value depends on the
+        nature of the problem.
+
+    metric : str or callable, default='minkowski'
+        the distance metric to use for the tree.  The default metric is
+        minkowski, and with p=2 is equivalent to the standard Euclidean
+        metric. See the documentation of :class:`DistanceMetric` for a
+        list of available metrics.
+        If metric is "precomputed", X is assumed to be a distance matrix and
+        must be square during fit. X may be a :term:`sparse graph`,
+        in which case only "nonzero" elements may be considered neighbors.
+
+    p : int, default=2
+        Parameter for the Minkowski metric from
+        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
+    metric_params : dict, default=None
+        Additional keyword arguments for the metric function.
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run for neighbors search.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    effective_metric_ : str
+        Metric used to compute distances to neighbors.
+
+    effective_metric_params_ : dict
+        Parameters for the metric used to compute distances to neighbors.
+
+    Examples
+    --------
+      >>> import numpy as np
+      >>> from sklearn.neighbors import NearestNeighbors
+      >>> samples = [[0, 0, 2], [1, 0, 0], [0, 0, 1]]
+
+      >>> neigh = NearestNeighbors(n_neighbors=2, radius=0.4)
+      >>> neigh.fit(samples)
+      NearestNeighbors(...)
+
+      >>> neigh.kneighbors([[0, 0, 1.3]], 2, return_distance=False)
+      array([[2, 0]]...)
+
+      >>> nbrs = neigh.radius_neighbors([[0, 0, 1.3]], 0.4, return_distance=False)
+      >>> np.asarray(nbrs[0][0])
+      array(2)
+
+    See also
+    --------
+    KNeighborsClassifier
+    RadiusNeighborsClassifier
+    KNeighborsRegressor
+    RadiusNeighborsRegressor
+    BallTree
+
+    Notes
+    -----
+    See :ref:`Nearest Neighbors <neighbors>` in the online documentation
+    for a discussion of the choice of ``algorithm`` and ``leaf_size``.
+
+    https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm
+    """
+
+    @_deprecate_positional_args
+    def __init__(self, *, n_neighbors=5, radius=1.0,
+                 algorithm='auto', leaf_size=30, metric='minkowski',
+                 p=2, metric_params=None, n_jobs=None):
+        super().__init__(
+              n_neighbors=n_neighbors,
+              radius=radius,
+              algorithm=algorithm,
+              leaf_size=leaf_size, metric=metric, p=p,
+              metric_params=metric_params, n_jobs=n_jobs)
--- a/venv/Lib/site-packages/sklearn/neighbors/ball_tree.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/ball_tree.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _ball_tree  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.neighbors.ball_tree'
+correct_import_path = 'sklearn.neighbors'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_ball_tree, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/neighbors/base.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/base.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _base  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.neighbors.base'
+correct_import_path = 'sklearn.neighbors'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_base, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/neighbors/classification.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/classification.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _classification  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.neighbors.classification'
+correct_import_path = 'sklearn.neighbors'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_classification, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/neighbors/dist_metrics.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/dist_metrics.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _dist_metrics  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.neighbors.dist_metrics'
+correct_import_path = 'sklearn.neighbors'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_dist_metrics, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/neighbors/graph.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/graph.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _graph  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.neighbors.graph'
+correct_import_path = 'sklearn.neighbors'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_graph, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/neighbors/kd_tree.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/kd_tree.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _kd_tree  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.neighbors.kd_tree'
+correct_import_path = 'sklearn.neighbors'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_kd_tree, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/neighbors/kde.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/kde.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _kde  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.neighbors.kde'
+correct_import_path = 'sklearn.neighbors'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_kde, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/neighbors/lof.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/lof.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _lof  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.neighbors.lof'
+correct_import_path = 'sklearn.neighbors'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_lof, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/neighbors/nca.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/nca.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _nca  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.neighbors.nca'
+correct_import_path = 'sklearn.neighbors'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_nca, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/neighbors/nearest_centroid.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/nearest_centroid.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _nearest_centroid  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.neighbors.nearest_centroid'
+correct_import_path = 'sklearn.neighbors'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_nearest_centroid, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/neighbors/quad_tree.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/quad_tree.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _quad_tree  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.neighbors.quad_tree'
+correct_import_path = 'sklearn.neighbors'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_quad_tree, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/neighbors/regression.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/regression.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _regression  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.neighbors.regression'
+correct_import_path = 'sklearn.neighbors'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_regression, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/neighbors/setup.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/setup.py
@ -0,0 +1,41 @@
+import os
+
+
+def configuration(parent_package='', top_path=None):
+    import numpy
+    from numpy.distutils.misc_util import Configuration
+
+    config = Configuration('neighbors', parent_package, top_path)
+    libraries = []
+    if os.name == 'posix':
+        libraries.append('m')
+
+    config.add_extension('_ball_tree',
+                         sources=['_ball_tree.pyx'],
+                         include_dirs=[numpy.get_include()],
+                         libraries=libraries)
+
+    config.add_extension('_kd_tree',
+                         sources=['_kd_tree.pyx'],
+                         include_dirs=[numpy.get_include()],
+                         libraries=libraries)
+
+    config.add_extension('_dist_metrics',
+                         sources=['_dist_metrics.pyx'],
+                         include_dirs=[numpy.get_include(),
+                                       os.path.join(numpy.get_include(),
+                                                    'numpy')],
+                         libraries=libraries)
+
+    config.add_extension('_typedefs',
+                         sources=['_typedefs.pyx'],
+                         include_dirs=[numpy.get_include()],
+                         libraries=libraries)
+    config.add_extension("_quad_tree",
+                         sources=["_quad_tree.pyx"],
+                         include_dirs=[numpy.get_include()],
+                         libraries=libraries)
+
+    config.add_subpackage('tests')
+
+    return config
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/init.py
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_ball_tree.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_ball_tree.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_dist_metrics.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_dist_metrics.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_graph.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_graph.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_kd_tree.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_kd_tree.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_kde.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_kde.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_lof.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_lof.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_nca.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_nca.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_nearest_centroid.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_nearest_centroid.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_neighbors.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_neighbors.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_neighbors_pipeline.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_neighbors_pipeline.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_neighbors_tree.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_neighbors_tree.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_quad_tree.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_quad_tree.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_ball_tree.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_ball_tree.py
@ -0,0 +1,67 @@
+import itertools
+
+import numpy as np
+import pytest
+from numpy.testing import assert_array_almost_equal
+from sklearn.neighbors._ball_tree import BallTree
+from sklearn.neighbors import DistanceMetric
+from sklearn.utils import check_random_state
+
+rng = np.random.RandomState(10)
+V_mahalanobis = rng.rand(3, 3)
+V_mahalanobis = np.dot(V_mahalanobis, V_mahalanobis.T)
+
+DIMENSION = 3
+
+METRICS = {'euclidean': {},
+           'manhattan': {},
+           'minkowski': dict(p=3),
+           'chebyshev': {},
+           'seuclidean': dict(V=rng.random_sample(DIMENSION)),
+           'wminkowski': dict(p=3, w=rng.random_sample(DIMENSION)),
+           'mahalanobis': dict(V=V_mahalanobis)}
+
+DISCRETE_METRICS = ['hamming',
+                    'canberra',
+                    'braycurtis']
+
+BOOLEAN_METRICS = ['matching', 'jaccard', 'dice', 'kulsinski',
+                   'rogerstanimoto', 'russellrao', 'sokalmichener',
+                   'sokalsneath']
+
+
+def brute_force_neighbors(X, Y, k, metric, **kwargs):
+    D = DistanceMetric.get_metric(metric, **kwargs).pairwise(Y, X)
+    ind = np.argsort(D, axis=1)[:, :k]
+    dist = D[np.arange(Y.shape[0])[:, None], ind]
+    return dist, ind
+
+
+@pytest.mark.parametrize('metric',
+                         itertools.chain(BOOLEAN_METRICS, DISCRETE_METRICS))
+def test_ball_tree_query_metrics(metric):
+    rng = check_random_state(0)
+    if metric in BOOLEAN_METRICS:
+        X = rng.random_sample((40, 10)).round(0)
+        Y = rng.random_sample((10, 10)).round(0)
+    elif metric in DISCRETE_METRICS:
+        X = (4 * rng.random_sample((40, 10))).round(0)
+        Y = (4 * rng.random_sample((10, 10))).round(0)
+
+    k = 5
+
+    bt = BallTree(X, leaf_size=1, metric=metric)
+    dist1, ind1 = bt.query(Y, k)
+    dist2, ind2 = brute_force_neighbors(X, Y, k, metric)
+    assert_array_almost_equal(dist1, dist2)
+
+
+def test_query_haversine():
+    rng = check_random_state(0)
+    X = 2 * np.pi * rng.random_sample((40, 2))
+    bt = BallTree(X, leaf_size=1, metric='haversine')
+    dist1, ind1 = bt.query(X, k=5)
+    dist2, ind2 = brute_force_neighbors(X, X, k=5, metric='haversine')
+
+    assert_array_almost_equal(dist1, dist2)
+    assert_array_almost_equal(ind1, ind2)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_dist_metrics.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_dist_metrics.py
@ -0,0 +1,203 @@
+import itertools
+import pickle
+
+import numpy as np
+from numpy.testing import assert_array_almost_equal
+
+import pytest
+
+from scipy.spatial.distance import cdist
+from sklearn.neighbors import DistanceMetric
+from sklearn.neighbors import BallTree
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_raises_regex
+from sklearn.utils.fixes import sp_version, parse_version
+
+
+def dist_func(x1, x2, p):
+    return np.sum((x1 - x2) ** p) ** (1. / p)
+
+
+rng = check_random_state(0)
+d = 4
+n1 = 20
+n2 = 25
+X1 = rng.random_sample((n1, d)).astype('float64', copy=False)
+X2 = rng.random_sample((n2, d)).astype('float64', copy=False)
+
+# make boolean arrays: ones and zeros
+X1_bool = X1.round(0)
+X2_bool = X2.round(0)
+
+V = rng.random_sample((d, d))
+VI = np.dot(V, V.T)
+
+BOOL_METRICS = ['matching', 'jaccard', 'dice',
+                'kulsinski', 'rogerstanimoto', 'russellrao',
+                'sokalmichener', 'sokalsneath']
+
+METRICS_DEFAULT_PARAMS = {'euclidean': {},
+                          'cityblock': {},
+                          'minkowski': dict(p=(1, 1.5, 2, 3)),
+                          'chebyshev': {},
+                          'seuclidean': dict(V=(rng.random_sample(d),)),
+                          'wminkowski': dict(p=(1, 1.5, 3),
+                                             w=(rng.random_sample(d),)),
+                          'mahalanobis': dict(VI=(VI,)),
+                          'hamming': {},
+                          'canberra': {},
+                          'braycurtis': {}}
+
+
+@pytest.mark.parametrize('metric', METRICS_DEFAULT_PARAMS)
+def test_cdist(metric):
+    argdict = METRICS_DEFAULT_PARAMS[metric]
+    keys = argdict.keys()
+    for vals in itertools.product(*argdict.values()):
+        kwargs = dict(zip(keys, vals))
+        D_true = cdist(X1, X2, metric, **kwargs)
+        check_cdist(metric, kwargs, D_true)
+
+
+@pytest.mark.parametrize('metric', BOOL_METRICS)
+def test_cdist_bool_metric(metric):
+    D_true = cdist(X1_bool, X2_bool, metric)
+    check_cdist_bool(metric, D_true)
+
+
+def check_cdist(metric, kwargs, D_true):
+    dm = DistanceMetric.get_metric(metric, **kwargs)
+    D12 = dm.pairwise(X1, X2)
+    assert_array_almost_equal(D12, D_true)
+
+
+def check_cdist_bool(metric, D_true):
+    dm = DistanceMetric.get_metric(metric)
+    D12 = dm.pairwise(X1_bool, X2_bool)
+    assert_array_almost_equal(D12, D_true)
+
+
+@pytest.mark.parametrize('metric', METRICS_DEFAULT_PARAMS)
+def test_pdist(metric):
+    argdict = METRICS_DEFAULT_PARAMS[metric]
+    keys = argdict.keys()
+    for vals in itertools.product(*argdict.values()):
+        kwargs = dict(zip(keys, vals))
+        D_true = cdist(X1, X1, metric, **kwargs)
+        check_pdist(metric, kwargs, D_true)
+
+
+@pytest.mark.parametrize('metric', BOOL_METRICS)
+def test_pdist_bool_metrics(metric):
+    D_true = cdist(X1_bool, X1_bool, metric)
+    check_pdist_bool(metric, D_true)
+
+
+def check_pdist(metric, kwargs, D_true):
+    dm = DistanceMetric.get_metric(metric, **kwargs)
+    D12 = dm.pairwise(X1)
+    assert_array_almost_equal(D12, D_true)
+
+
+def check_pdist_bool(metric, D_true):
+    dm = DistanceMetric.get_metric(metric)
+    D12 = dm.pairwise(X1_bool)
+    # Based on https://github.com/scipy/scipy/pull/7373
+    # When comparing two all-zero vectors, scipy>=1.2.0 jaccard metric
+    # was changed to return 0, instead of nan.
+    if metric == 'jaccard' and sp_version < parse_version('1.2.0'):
+        D_true[np.isnan(D_true)] = 0
+    assert_array_almost_equal(D12, D_true)
+
+
+@pytest.mark.parametrize('metric', METRICS_DEFAULT_PARAMS)
+def test_pickle(metric):
+    argdict = METRICS_DEFAULT_PARAMS[metric]
+    keys = argdict.keys()
+    for vals in itertools.product(*argdict.values()):
+        kwargs = dict(zip(keys, vals))
+        check_pickle(metric, kwargs)
+
+
+@pytest.mark.parametrize('metric', BOOL_METRICS)
+def test_pickle_bool_metrics(metric):
+    dm = DistanceMetric.get_metric(metric)
+    D1 = dm.pairwise(X1_bool)
+    dm2 = pickle.loads(pickle.dumps(dm))
+    D2 = dm2.pairwise(X1_bool)
+    assert_array_almost_equal(D1, D2)
+
+
+def check_pickle(metric, kwargs):
+    dm = DistanceMetric.get_metric(metric, **kwargs)
+    D1 = dm.pairwise(X1)
+    dm2 = pickle.loads(pickle.dumps(dm))
+    D2 = dm2.pairwise(X1)
+    assert_array_almost_equal(D1, D2)
+
+
+def test_haversine_metric():
+    def haversine_slow(x1, x2):
+        return 2 * np.arcsin(np.sqrt(np.sin(0.5 * (x1[0] - x2[0])) ** 2
+                                     + np.cos(x1[0]) * np.cos(x2[0]) *
+                                     np.sin(0.5 * (x1[1] - x2[1])) ** 2))
+
+    X = np.random.random((10, 2))
+
+    haversine = DistanceMetric.get_metric("haversine")
+
+    D1 = haversine.pairwise(X)
+    D2 = np.zeros_like(D1)
+    for i, x1 in enumerate(X):
+        for j, x2 in enumerate(X):
+            D2[i, j] = haversine_slow(x1, x2)
+
+    assert_array_almost_equal(D1, D2)
+    assert_array_almost_equal(haversine.dist_to_rdist(D1),
+                              np.sin(0.5 * D2) ** 2)
+
+
+def test_pyfunc_metric():
+    X = np.random.random((10, 3))
+
+    euclidean = DistanceMetric.get_metric("euclidean")
+    pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func, p=2)
+
+    # Check if both callable metric and predefined metric initialized
+    # DistanceMetric object is picklable
+    euclidean_pkl = pickle.loads(pickle.dumps(euclidean))
+    pyfunc_pkl = pickle.loads(pickle.dumps(pyfunc))
+
+    D1 = euclidean.pairwise(X)
+    D2 = pyfunc.pairwise(X)
+
+    D1_pkl = euclidean_pkl.pairwise(X)
+    D2_pkl = pyfunc_pkl.pairwise(X)
+
+    assert_array_almost_equal(D1, D2)
+    assert_array_almost_equal(D1_pkl, D2_pkl)
+
+
+def test_bad_pyfunc_metric():
+    def wrong_distance(x, y):
+        return "1"
+
+    X = np.ones((5, 2))
+    assert_raises_regex(TypeError,
+                        "Custom distance function must accept two vectors",
+                        BallTree, X, metric=wrong_distance)
+
+
+def test_input_data_size():
+    # Regression test for #6288
+    # Previously, a metric requiring a particular input dimension would fail
+    def custom_metric(x, y):
+        assert x.shape[0] == 3
+        return np.sum((x - y) ** 2)
+
+    rng = check_random_state(0)
+    X = rng.rand(10, 3)
+
+    pyfunc = DistanceMetric.get_metric("pyfunc", func=custom_metric)
+    eucl = DistanceMetric.get_metric("euclidean")
+    assert_array_almost_equal(pyfunc.pairwise(X), eucl.pairwise(X) ** 2)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_graph.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_graph.py
@ -0,0 +1,79 @@
+import numpy as np
+
+from sklearn.metrics import euclidean_distances
+from sklearn.neighbors import KNeighborsTransformer, RadiusNeighborsTransformer
+from sklearn.neighbors._base import _is_sorted_by_data
+
+
+def test_transformer_result():
+    # Test the number of neighbors returned
+    n_neighbors = 5
+    n_samples_fit = 20
+    n_queries = 18
+    n_features = 10
+
+    rng = np.random.RandomState(42)
+    X = rng.randn(n_samples_fit, n_features)
+    X2 = rng.randn(n_queries, n_features)
+    radius = np.percentile(euclidean_distances(X), 10)
+
+    # with n_neighbors
+    for mode in ['distance', 'connectivity']:
+        add_one = mode == 'distance'
+        nnt = KNeighborsTransformer(n_neighbors=n_neighbors, mode=mode)
+        Xt = nnt.fit_transform(X)
+        assert Xt.shape == (n_samples_fit, n_samples_fit)
+        assert Xt.data.shape == (n_samples_fit * (n_neighbors + add_one), )
+        assert Xt.format == 'csr'
+        assert _is_sorted_by_data(Xt)
+
+        X2t = nnt.transform(X2)
+        assert X2t.shape == (n_queries, n_samples_fit)
+        assert X2t.data.shape == (n_queries * (n_neighbors + add_one), )
+        assert X2t.format == 'csr'
+        assert _is_sorted_by_data(X2t)
+
+    # with radius
+    for mode in ['distance', 'connectivity']:
+        add_one = mode == 'distance'
+        nnt = RadiusNeighborsTransformer(radius=radius, mode=mode)
+        Xt = nnt.fit_transform(X)
+        assert Xt.shape == (n_samples_fit, n_samples_fit)
+        assert not Xt.data.shape == (n_samples_fit * (n_neighbors + add_one), )
+        assert Xt.format == 'csr'
+        assert _is_sorted_by_data(Xt)
+
+        X2t = nnt.transform(X2)
+        assert X2t.shape == (n_queries, n_samples_fit)
+        assert not X2t.data.shape == (n_queries * (n_neighbors + add_one), )
+        assert X2t.format == 'csr'
+        assert _is_sorted_by_data(X2t)
+
+
+def _has_explicit_diagonal(X):
+    """Return True if the diagonal is explicitly stored"""
+    X = X.tocoo()
+    explicit = X.row[X.row == X.col]
+    return len(explicit) == X.shape[0]
+
+
+def test_explicit_diagonal():
+    # Test that the diagonal is explicitly stored in the sparse graph
+    n_neighbors = 5
+    n_samples_fit, n_samples_transform, n_features = 20, 18, 10
+    rng = np.random.RandomState(42)
+    X = rng.randn(n_samples_fit, n_features)
+    X2 = rng.randn(n_samples_transform, n_features)
+
+    nnt = KNeighborsTransformer(n_neighbors=n_neighbors)
+    Xt = nnt.fit_transform(X)
+    assert _has_explicit_diagonal(Xt)
+    assert np.all(Xt.data.reshape(n_samples_fit, n_neighbors + 1)[:, 0] == 0)
+
+    Xt = nnt.transform(X)
+    assert _has_explicit_diagonal(Xt)
+    assert np.all(Xt.data.reshape(n_samples_fit, n_neighbors + 1)[:, 0] == 0)
+
+    # Using transform on new data should not always have zero diagonal
+    X2t = nnt.transform(X2)
+    assert not _has_explicit_diagonal(X2t)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_kd_tree.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_kd_tree.py
@ -0,0 +1,6 @@
+DIMENSION = 3
+
+METRICS = {'euclidean': {},
+           'manhattan': {},
+           'chebyshev': {},
+           'minkowski': dict(p=3)}
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_kde.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_kde.py
@ -0,0 +1,250 @@
+import numpy as np
+
+import pytest
+
+from sklearn.utils._testing import assert_allclose, assert_raises
+from sklearn.neighbors import KernelDensity, KDTree, NearestNeighbors
+from sklearn.neighbors._ball_tree import kernel_norm
+from sklearn.pipeline import make_pipeline
+from sklearn.datasets import make_blobs
+from sklearn.model_selection import GridSearchCV
+from sklearn.preprocessing import StandardScaler
+from sklearn.exceptions import NotFittedError
+import joblib
+
+
+# XXX Duplicated in test_neighbors_tree, test_kde
+def compute_kernel_slow(Y, X, kernel, h):
+    d = np.sqrt(((Y[:, None, :] - X) ** 2).sum(-1))
+    norm = kernel_norm(h, X.shape[1], kernel) / X.shape[0]
+
+    if kernel == 'gaussian':
+        return norm * np.exp(-0.5 * (d * d) / (h * h)).sum(-1)
+    elif kernel == 'tophat':
+        return norm * (d < h).sum(-1)
+    elif kernel == 'epanechnikov':
+        return norm * ((1.0 - (d * d) / (h * h)) * (d < h)).sum(-1)
+    elif kernel == 'exponential':
+        return norm * (np.exp(-d / h)).sum(-1)
+    elif kernel == 'linear':
+        return norm * ((1 - d / h) * (d < h)).sum(-1)
+    elif kernel == 'cosine':
+        return norm * (np.cos(0.5 * np.pi * d / h) * (d < h)).sum(-1)
+    else:
+        raise ValueError('kernel not recognized')
+
+
+def check_results(kernel, bandwidth, atol, rtol, X, Y, dens_true):
+    kde = KernelDensity(kernel=kernel, bandwidth=bandwidth,
+                        atol=atol, rtol=rtol)
+    log_dens = kde.fit(X).score_samples(Y)
+    assert_allclose(np.exp(log_dens), dens_true,
+                    atol=atol, rtol=max(1E-7, rtol))
+    assert_allclose(np.exp(kde.score(Y)),
+                    np.prod(dens_true),
+                    atol=atol, rtol=max(1E-7, rtol))
+
+
+@pytest.mark.parametrize(
+        'kernel',
+        ['gaussian', 'tophat', 'epanechnikov',
+         'exponential', 'linear', 'cosine'])
+@pytest.mark.parametrize('bandwidth', [0.01, 0.1, 1])
+def test_kernel_density(kernel, bandwidth):
+    n_samples, n_features = (100, 3)
+
+    rng = np.random.RandomState(0)
+    X = rng.randn(n_samples, n_features)
+    Y = rng.randn(n_samples, n_features)
+
+    dens_true = compute_kernel_slow(Y, X, kernel, bandwidth)
+
+    for rtol in [0, 1E-5]:
+        for atol in [1E-6, 1E-2]:
+            for breadth_first in (True, False):
+                check_results(kernel, bandwidth, atol, rtol,
+                              X, Y, dens_true)
+
+
+def test_kernel_density_sampling(n_samples=100, n_features=3):
+    rng = np.random.RandomState(0)
+    X = rng.randn(n_samples, n_features)
+
+    bandwidth = 0.2
+
+    for kernel in ['gaussian', 'tophat']:
+        # draw a tophat sample
+        kde = KernelDensity(bandwidth=bandwidth, kernel=kernel).fit(X)
+        samp = kde.sample(100)
+        assert X.shape == samp.shape
+
+        # check that samples are in the right range
+        nbrs = NearestNeighbors(n_neighbors=1).fit(X)
+        dist, ind = nbrs.kneighbors(X, return_distance=True)
+
+        if kernel == 'tophat':
+            assert np.all(dist < bandwidth)
+        elif kernel == 'gaussian':
+            # 5 standard deviations is safe for 100 samples, but there's a
+            # very small chance this test could fail.
+            assert np.all(dist < 5 * bandwidth)
+
+    # check unsupported kernels
+    for kernel in ['epanechnikov', 'exponential', 'linear', 'cosine']:
+        kde = KernelDensity(bandwidth=bandwidth, kernel=kernel).fit(X)
+        assert_raises(NotImplementedError, kde.sample, 100)
+
+    # non-regression test: used to return a scalar
+    X = rng.randn(4, 1)
+    kde = KernelDensity(kernel="gaussian").fit(X)
+    assert kde.sample().shape == (1, 1)
+
+
+@pytest.mark.parametrize('algorithm', ['auto', 'ball_tree', 'kd_tree'])
+@pytest.mark.parametrize('metric',
+                         ['euclidean', 'minkowski', 'manhattan',
+                          'chebyshev', 'haversine'])
+def test_kde_algorithm_metric_choice(algorithm, metric):
+    # Smoke test for various metrics and algorithms
+    rng = np.random.RandomState(0)
+    X = rng.randn(10, 2)    # 2 features required for haversine dist.
+    Y = rng.randn(10, 2)
+
+    if algorithm == 'kd_tree' and metric not in KDTree.valid_metrics:
+        assert_raises(ValueError, KernelDensity,
+                      algorithm=algorithm, metric=metric)
+    else:
+        kde = KernelDensity(algorithm=algorithm, metric=metric)
+        kde.fit(X)
+        y_dens = kde.score_samples(Y)
+        assert y_dens.shape == Y.shape[:1]
+
+
+def test_kde_score(n_samples=100, n_features=3):
+    pass
+    # FIXME
+    # rng = np.random.RandomState(0)
+    # X = rng.random_sample((n_samples, n_features))
+    # Y = rng.random_sample((n_samples, n_features))
+
+
+def test_kde_badargs():
+    assert_raises(ValueError, KernelDensity,
+                  algorithm='blah')
+    assert_raises(ValueError, KernelDensity,
+                  bandwidth=0)
+    assert_raises(ValueError, KernelDensity,
+                  kernel='blah')
+    assert_raises(ValueError, KernelDensity,
+                  metric='blah')
+    assert_raises(ValueError, KernelDensity,
+                  algorithm='kd_tree', metric='blah')
+    kde = KernelDensity()
+    assert_raises(ValueError, kde.fit, np.random.random((200, 10)),
+                  sample_weight=np.random.random((200, 10)))
+    assert_raises(ValueError, kde.fit, np.random.random((200, 10)),
+                  sample_weight=-np.random.random(200))
+
+
+def test_kde_pipeline_gridsearch():
+    # test that kde plays nice in pipelines and grid-searches
+    X, _ = make_blobs(cluster_std=.1, random_state=1,
+                      centers=[[0, 1], [1, 0], [0, 0]])
+    pipe1 = make_pipeline(StandardScaler(with_mean=False, with_std=False),
+                          KernelDensity(kernel="gaussian"))
+    params = dict(kerneldensity__bandwidth=[0.001, 0.01, 0.1, 1, 10])
+    search = GridSearchCV(pipe1, param_grid=params)
+    search.fit(X)
+    assert search.best_params_['kerneldensity__bandwidth'] == .1
+
+
+def test_kde_sample_weights():
+    n_samples = 400
+    size_test = 20
+    weights_neutral = np.full(n_samples, 3.)
+    for d in [1, 2, 10]:
+        rng = np.random.RandomState(0)
+        X = rng.rand(n_samples, d)
+        weights = 1 + (10 * X.sum(axis=1)).astype(np.int8)
+        X_repetitions = np.repeat(X, weights, axis=0)
+        n_samples_test = size_test // d
+        test_points = rng.rand(n_samples_test, d)
+        for algorithm in ['auto', 'ball_tree', 'kd_tree']:
+            for metric in ['euclidean', 'minkowski', 'manhattan',
+                           'chebyshev']:
+                if algorithm != 'kd_tree' or metric in KDTree.valid_metrics:
+                    kde = KernelDensity(algorithm=algorithm, metric=metric)
+
+                    # Test that adding a constant sample weight has no effect
+                    kde.fit(X, sample_weight=weights_neutral)
+                    scores_const_weight = kde.score_samples(test_points)
+                    sample_const_weight = kde.sample(random_state=1234)
+                    kde.fit(X)
+                    scores_no_weight = kde.score_samples(test_points)
+                    sample_no_weight = kde.sample(random_state=1234)
+                    assert_allclose(scores_const_weight, scores_no_weight)
+                    assert_allclose(sample_const_weight, sample_no_weight)
+
+                    # Test equivalence between sampling and (integer) weights
+                    kde.fit(X, sample_weight=weights)
+                    scores_weight = kde.score_samples(test_points)
+                    sample_weight = kde.sample(random_state=1234)
+                    kde.fit(X_repetitions)
+                    scores_ref_sampling = kde.score_samples(test_points)
+                    sample_ref_sampling = kde.sample(random_state=1234)
+                    assert_allclose(scores_weight, scores_ref_sampling)
+                    assert_allclose(sample_weight, sample_ref_sampling)
+
+                    # Test that sample weights has a non-trivial effect
+                    diff = np.max(np.abs(scores_no_weight - scores_weight))
+                    assert diff > 0.001
+
+                    # Test invariance with respect to arbitrary scaling
+                    scale_factor = rng.rand()
+                    kde.fit(X, sample_weight=(scale_factor * weights))
+                    scores_scaled_weight = kde.score_samples(test_points)
+                    assert_allclose(scores_scaled_weight, scores_weight)
+
+
+def test_sample_weight_invalid():
+    # Check sample weighting raises errors.
+    kde = KernelDensity()
+    data = np.reshape([1., 2., 3.], (-1, 1))
+
+    sample_weight = [0.1, -0.2, 0.3]
+    expected_err = "sample_weight must have positive values"
+    with pytest.raises(ValueError, match=expected_err):
+        kde.fit(data, sample_weight=sample_weight)
+
+
+@pytest.mark.parametrize('sample_weight', [None, [0.1, 0.2, 0.3]])
+def test_pickling(tmpdir, sample_weight):
+    # Make sure that predictions are the same before and after pickling. Used
+    # to be a bug because sample_weights wasn't pickled and the resulting tree
+    # would miss some info.
+
+    kde = KernelDensity()
+    data = np.reshape([1., 2., 3.], (-1, 1))
+    kde.fit(data, sample_weight=sample_weight)
+
+    X = np.reshape([1.1, 2.1], (-1, 1))
+    scores = kde.score_samples(X)
+
+    file_path = str(tmpdir.join('dump.pkl'))
+    joblib.dump(kde, file_path)
+    kde = joblib.load(file_path)
+    scores_pickled = kde.score_samples(X)
+
+    assert_allclose(scores, scores_pickled)
+
+
+@pytest.mark.parametrize('method', ['score_samples', 'sample'])
+def test_check_is_fitted(method):
+    # Check that predict raises an exception in an unfitted estimator.
+    # Unfitted estimators should raise a NotFittedError.
+    rng = np.random.RandomState(0)
+    X = rng.randn(10, 2)
+    kde = KernelDensity()
+
+    with pytest.raises(NotFittedError):
+        getattr(kde, method)(X)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_lof.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_lof.py
@ -0,0 +1,232 @@
+# Authors: Nicolas Goix <nicolas.goix@telecom-paristech.fr>
+#          Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
+# License: BSD 3 clause
+
+from math import sqrt
+
+import numpy as np
+from sklearn import neighbors
+
+import pytest
+from numpy.testing import assert_array_equal
+
+from sklearn import metrics
+from sklearn.metrics import roc_auc_score
+
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_warns_message
+from sklearn.utils._testing import assert_raises
+from sklearn.utils._testing import assert_raises_regex
+from sklearn.utils.estimator_checks import check_estimator
+from sklearn.utils.estimator_checks import check_outlier_corruption
+
+from sklearn.datasets import load_iris
+
+
+# load the iris dataset
+# and randomly permute it
+rng = check_random_state(0)
+iris = load_iris()
+perm = rng.permutation(iris.target.size)
+iris.data = iris.data[perm]
+iris.target = iris.target[perm]
+
+
+def test_lof():
+    # Toy sample (the last two samples are outliers):
+    X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [5, 3], [-4, 2]]
+
+    # Test LocalOutlierFactor:
+    clf = neighbors.LocalOutlierFactor(n_neighbors=5)
+    score = clf.fit(X).negative_outlier_factor_
+    assert_array_equal(clf._fit_X, X)
+
+    # Assert largest outlier score is smaller than smallest inlier score:
+    assert np.min(score[:-2]) > np.max(score[-2:])
+
+    # Assert predict() works:
+    clf = neighbors.LocalOutlierFactor(contamination=0.25,
+                                       n_neighbors=5).fit(X)
+    assert_array_equal(clf._predict(), 6 * [1] + 2 * [-1])
+    assert_array_equal(clf.fit_predict(X), 6 * [1] + 2 * [-1])
+
+
+def test_lof_performance():
+    # Generate train/test data
+    rng = check_random_state(2)
+    X = 0.3 * rng.randn(120, 2)
+    X_train = X[:100]
+
+    # Generate some abnormal novel observations
+    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
+    X_test = np.r_[X[100:], X_outliers]
+    y_test = np.array([0] * 20 + [1] * 20)
+
+    # fit the model for novelty detection
+    clf = neighbors.LocalOutlierFactor(novelty=True).fit(X_train)
+
+    # predict scores (the lower, the more normal)
+    y_pred = -clf.decision_function(X_test)
+
+    # check that roc_auc is good
+    assert roc_auc_score(y_test, y_pred) > .99
+
+
+def test_lof_values():
+    # toy samples:
+    X_train = [[1, 1], [1, 2], [2, 1]]
+    clf1 = neighbors.LocalOutlierFactor(n_neighbors=2,
+                                        contamination=0.1,
+                                        novelty=True).fit(X_train)
+    clf2 = neighbors.LocalOutlierFactor(n_neighbors=2,
+                                        novelty=True).fit(X_train)
+    s_0 = 2. * sqrt(2.) / (1. + sqrt(2.))
+    s_1 = (1. + sqrt(2)) * (1. / (4. * sqrt(2.)) + 1. / (2. + 2. * sqrt(2)))
+    # check predict()
+    assert_array_almost_equal(-clf1.negative_outlier_factor_, [s_0, s_1, s_1])
+    assert_array_almost_equal(-clf2.negative_outlier_factor_, [s_0, s_1, s_1])
+    # check predict(one sample not in train)
+    assert_array_almost_equal(-clf1.score_samples([[2., 2.]]), [s_0])
+    assert_array_almost_equal(-clf2.score_samples([[2., 2.]]), [s_0])
+    # check predict(one sample already in train)
+    assert_array_almost_equal(-clf1.score_samples([[1., 1.]]), [s_1])
+    assert_array_almost_equal(-clf2.score_samples([[1., 1.]]), [s_1])
+
+
+def test_lof_precomputed(random_state=42):
+    """Tests LOF with a distance matrix."""
+    # Note: smaller samples may result in spurious test success
+    rng = np.random.RandomState(random_state)
+    X = rng.random_sample((10, 4))
+    Y = rng.random_sample((3, 4))
+    DXX = metrics.pairwise_distances(X, metric='euclidean')
+    DYX = metrics.pairwise_distances(Y, X, metric='euclidean')
+    # As a feature matrix (n_samples by n_features)
+    lof_X = neighbors.LocalOutlierFactor(n_neighbors=3, novelty=True)
+    lof_X.fit(X)
+    pred_X_X = lof_X._predict()
+    pred_X_Y = lof_X.predict(Y)
+
+    # As a dense distance matrix (n_samples by n_samples)
+    lof_D = neighbors.LocalOutlierFactor(n_neighbors=3, algorithm='brute',
+                                         metric='precomputed', novelty=True)
+    lof_D.fit(DXX)
+    pred_D_X = lof_D._predict()
+    pred_D_Y = lof_D.predict(DYX)
+
+    assert_array_almost_equal(pred_X_X, pred_D_X)
+    assert_array_almost_equal(pred_X_Y, pred_D_Y)
+
+
+def test_n_neighbors_attribute():
+    X = iris.data
+    clf = neighbors.LocalOutlierFactor(n_neighbors=500).fit(X)
+    assert clf.n_neighbors_ == X.shape[0] - 1
+
+    clf = neighbors.LocalOutlierFactor(n_neighbors=500)
+    assert_warns_message(UserWarning,
+                         "n_neighbors will be set to (n_samples - 1)",
+                         clf.fit, X)
+    assert clf.n_neighbors_ == X.shape[0] - 1
+
+
+def test_score_samples():
+    X_train = [[1, 1], [1, 2], [2, 1]]
+    clf1 = neighbors.LocalOutlierFactor(n_neighbors=2,
+                                        contamination=0.1,
+                                        novelty=True).fit(X_train)
+    clf2 = neighbors.LocalOutlierFactor(n_neighbors=2,
+                                        novelty=True).fit(X_train)
+    assert_array_equal(clf1.score_samples([[2., 2.]]),
+                       clf1.decision_function([[2., 2.]]) + clf1.offset_)
+    assert_array_equal(clf2.score_samples([[2., 2.]]),
+                       clf2.decision_function([[2., 2.]]) + clf2.offset_)
+    assert_array_equal(clf1.score_samples([[2., 2.]]),
+                       clf2.score_samples([[2., 2.]]))
+
+
+def test_contamination():
+    X = [[1, 1], [1, 0]]
+    clf = neighbors.LocalOutlierFactor(contamination=0.6)
+    assert_raises(ValueError, clf.fit, X)
+
+
+def test_novelty_errors():
+    X = iris.data
+
+    # check errors for novelty=False
+    clf = neighbors.LocalOutlierFactor()
+    clf.fit(X)
+    # predict, decision_function and score_samples raise ValueError
+    for method in ['predict', 'decision_function', 'score_samples']:
+        msg = ('{} is not available when novelty=False'.format(method))
+        assert_raises_regex(AttributeError, msg, getattr, clf, method)
+
+    # check errors for novelty=True
+    clf = neighbors.LocalOutlierFactor(novelty=True)
+    msg = 'fit_predict is not available when novelty=True'
+    assert_raises_regex(AttributeError, msg, getattr, clf, 'fit_predict')
+
+
+def test_novelty_training_scores():
+    # check that the scores of the training samples are still accessible
+    # when novelty=True through the negative_outlier_factor_ attribute
+    X = iris.data
+
+    # fit with novelty=False
+    clf_1 = neighbors.LocalOutlierFactor()
+    clf_1.fit(X)
+    scores_1 = clf_1.negative_outlier_factor_
+
+    # fit with novelty=True
+    clf_2 = neighbors.LocalOutlierFactor(novelty=True)
+    clf_2.fit(X)
+    scores_2 = clf_2.negative_outlier_factor_
+
+    assert_array_almost_equal(scores_1, scores_2)
+
+
+def test_hasattr_prediction():
+    # check availability of prediction methods depending on novelty value.
+    X = [[1, 1], [1, 2], [2, 1]]
+
+    # when novelty=True
+    clf = neighbors.LocalOutlierFactor(novelty=True)
+    clf.fit(X)
+    assert hasattr(clf, 'predict')
+    assert hasattr(clf, 'decision_function')
+    assert hasattr(clf, 'score_samples')
+    assert not hasattr(clf, 'fit_predict')
+
+    # when novelty=False
+    clf = neighbors.LocalOutlierFactor(novelty=False)
+    clf.fit(X)
+    assert hasattr(clf, 'fit_predict')
+    assert not hasattr(clf, 'predict')
+    assert not hasattr(clf, 'decision_function')
+    assert not hasattr(clf, 'score_samples')
+
+
+def test_novelty_true_common_tests():
+
+    # the common tests are run for the default LOF (novelty=False).
+    # here we run these common tests for LOF when novelty=True
+    check_estimator(neighbors.LocalOutlierFactor(novelty=True))
+
+
+@pytest.mark.parametrize('expected_outliers', [30, 53])
+def test_predicted_outlier_number(expected_outliers):
+    # the number of predicted outliers should be equal to the number of
+    # expected outliers unless there are ties in the abnormality scores.
+    X = iris.data
+    n_samples = X.shape[0]
+    contamination = float(expected_outliers)/n_samples
+
+    clf = neighbors.LocalOutlierFactor(contamination=contamination)
+    y_pred = clf.fit_predict(X)
+
+    num_outliers = np.sum(y_pred != 1)
+    if num_outliers != expected_outliers:
+        y_dec = clf.negative_outlier_factor_
+        check_outlier_corruption(num_outliers, expected_outliers, y_dec)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_nca.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_nca.py
@ -0,0 +1,534 @@
+# coding: utf-8
+"""
+Testing for Neighborhood Component Analysis module (sklearn.neighbors.nca)
+"""
+
+# Authors: William de Vazelhes <wdevazelhes@gmail.com>
+#          John Chiotellis <ioannis.chiotellis@in.tum.de>
+# License: BSD 3 clause
+
+import pytest
+import re
+import numpy as np
+from numpy.testing import assert_array_equal, assert_array_almost_equal
+from scipy.optimize import check_grad
+from sklearn import clone
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import (assert_raises,
+                                   assert_raise_message, assert_warns_message)
+from sklearn.datasets import load_iris, make_classification, make_blobs
+from sklearn.neighbors import NeighborhoodComponentsAnalysis
+from sklearn.metrics import pairwise_distances
+
+
+rng = check_random_state(0)
+# load and shuffle iris dataset
+iris = load_iris()
+perm = rng.permutation(iris.target.size)
+iris_data = iris.data[perm]
+iris_target = iris.target[perm]
+EPS = np.finfo(float).eps
+
+
+def test_simple_example():
+    """Test on a simple example.
+
+    Puts four points in the input space where the opposite labels points are
+    next to each other. After transform the samples from the same class
+    should be next to each other.
+
+    """
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    nca = NeighborhoodComponentsAnalysis(n_components=2, init='identity',
+                                         random_state=42)
+    nca.fit(X, y)
+    X_t = nca.transform(X)
+    assert_array_equal(pairwise_distances(X_t).argsort()[:, 1],
+                       np.array([2, 3, 0, 1]))
+
+
+def test_toy_example_collapse_points():
+    """Test on a toy example of three points that should collapse
+
+    We build a simple example: two points from the same class and a point from
+    a different class in the middle of them. On this simple example, the new
+    (transformed) points should all collapse into one single point. Indeed, the
+    objective is 2/(1 + exp(d/2)), with d the euclidean distance between the
+    two samples from the same class. This is maximized for d=0 (because d>=0),
+    with an objective equal to 1 (loss=-1.).
+
+    """
+    rng = np.random.RandomState(42)
+    input_dim = 5
+    two_points = rng.randn(2, input_dim)
+    X = np.vstack([two_points, two_points.mean(axis=0)[np.newaxis, :]])
+    y = [0, 0, 1]
+
+    class LossStorer:
+
+        def __init__(self, X, y):
+            self.loss = np.inf  # initialize the loss to very high
+            # Initialize a fake NCA and variables needed to compute the loss:
+            self.fake_nca = NeighborhoodComponentsAnalysis()
+            self.fake_nca.n_iter_ = np.inf
+            self.X, y, _ = self.fake_nca._validate_params(X, y)
+            self.same_class_mask = y[:, np.newaxis] == y[np.newaxis, :]
+
+        def callback(self, transformation, n_iter):
+            """Stores the last value of the loss function"""
+            self.loss, _ = self.fake_nca._loss_grad_lbfgs(transformation,
+                                                          self.X,
+                                                          self.same_class_mask,
+                                                          -1.0)
+
+    loss_storer = LossStorer(X, y)
+    nca = NeighborhoodComponentsAnalysis(random_state=42,
+                                         callback=loss_storer.callback)
+    X_t = nca.fit_transform(X, y)
+    print(X_t)
+    # test that points are collapsed into one point
+    assert_array_almost_equal(X_t - X_t[0], 0.)
+    assert abs(loss_storer.loss + 1) < 1e-10
+
+
+def test_finite_differences():
+    """Test gradient of loss function
+
+    Assert that the gradient is almost equal to its finite differences
+    approximation.
+    """
+    # Initialize the transformation `M`, as well as `X` and `y` and `NCA`
+    rng = np.random.RandomState(42)
+    X, y = make_classification()
+    M = rng.randn(rng.randint(1, X.shape[1] + 1),
+                  X.shape[1])
+    nca = NeighborhoodComponentsAnalysis()
+    nca.n_iter_ = 0
+    mask = y[:, np.newaxis] == y[np.newaxis, :]
+
+    def fun(M):
+        return nca._loss_grad_lbfgs(M, X, mask)[0]
+
+    def grad(M):
+        return nca._loss_grad_lbfgs(M, X, mask)[1]
+
+    # compute relative error
+    rel_diff = check_grad(fun, grad, M.ravel()) / np.linalg.norm(grad(M))
+    np.testing.assert_almost_equal(rel_diff, 0., decimal=5)
+
+
+def test_params_validation():
+    # Test that invalid parameters raise value error
+    X = np.arange(12).reshape(4, 3)
+    y = [1, 1, 2, 2]
+    NCA = NeighborhoodComponentsAnalysis
+    rng = np.random.RandomState(42)
+
+    # TypeError
+    assert_raises(TypeError, NCA(max_iter='21').fit, X, y)
+    assert_raises(TypeError, NCA(verbose='true').fit, X, y)
+    assert_raises(TypeError, NCA(tol='1').fit, X, y)
+    assert_raises(TypeError, NCA(n_components='invalid').fit, X, y)
+    assert_raises(TypeError, NCA(warm_start=1).fit, X, y)
+
+    # ValueError
+    assert_raise_message(ValueError,
+                         "`init` must be 'auto', 'pca', 'lda', 'identity', "
+                         "'random' or a numpy array of shape "
+                         "(n_components, n_features).",
+                         NCA(init=1).fit, X, y)
+    assert_raise_message(ValueError,
+                         '`max_iter`= -1, must be >= 1.',
+                         NCA(max_iter=-1).fit, X, y)
+
+    init = rng.rand(5, 3)
+    assert_raise_message(ValueError,
+                         'The output dimensionality ({}) of the given linear '
+                         'transformation `init` cannot be greater than its '
+                         'input dimensionality ({}).'
+                         .format(init.shape[0], init.shape[1]),
+                         NCA(init=init).fit, X, y)
+
+    n_components = 10
+    assert_raise_message(ValueError,
+                         'The preferred dimensionality of the '
+                         'projected space `n_components` ({}) cannot '
+                         'be greater than the given data '
+                         'dimensionality ({})!'
+                         .format(n_components, X.shape[1]),
+                         NCA(n_components=n_components).fit, X, y)
+
+
+def test_transformation_dimensions():
+    X = np.arange(12).reshape(4, 3)
+    y = [1, 1, 2, 2]
+
+    # Fail if transformation input dimension does not match inputs dimensions
+    transformation = np.array([[1, 2], [3, 4]])
+    assert_raises(ValueError,
+                  NeighborhoodComponentsAnalysis(init=transformation).fit,
+                  X, y)
+
+    # Fail if transformation output dimension is larger than
+    # transformation input dimension
+    transformation = np.array([[1, 2], [3, 4], [5, 6]])
+    # len(transformation) > len(transformation[0])
+    assert_raises(ValueError,
+                  NeighborhoodComponentsAnalysis(init=transformation).fit,
+                  X, y)
+
+    # Pass otherwise
+    transformation = np.arange(9).reshape(3, 3)
+    NeighborhoodComponentsAnalysis(init=transformation).fit(X, y)
+
+
+def test_n_components():
+    rng = np.random.RandomState(42)
+    X = np.arange(12).reshape(4, 3)
+    y = [1, 1, 2, 2]
+
+    init = rng.rand(X.shape[1] - 1, 3)
+
+    # n_components = X.shape[1] != transformation.shape[0]
+    n_components = X.shape[1]
+    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
+    assert_raise_message(ValueError,
+                         'The preferred dimensionality of the '
+                         'projected space `n_components` ({}) does not match '
+                         'the output dimensionality of the given '
+                         'linear transformation `init` ({})!'
+                         .format(n_components, init.shape[0]),
+                         nca.fit, X, y)
+
+    # n_components > X.shape[1]
+    n_components = X.shape[1] + 2
+    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
+    assert_raise_message(ValueError,
+                         'The preferred dimensionality of the '
+                         'projected space `n_components` ({}) cannot '
+                         'be greater than the given data '
+                         'dimensionality ({})!'
+                         .format(n_components, X.shape[1]),
+                         nca.fit, X, y)
+
+    # n_components < X.shape[1]
+    nca = NeighborhoodComponentsAnalysis(n_components=2, init='identity')
+    nca.fit(X, y)
+
+
+def test_init_transformation():
+    rng = np.random.RandomState(42)
+    X, y = make_blobs(n_samples=30, centers=6, n_features=5, random_state=0)
+
+    # Start learning from scratch
+    nca = NeighborhoodComponentsAnalysis(init='identity')
+    nca.fit(X, y)
+
+    # Initialize with random
+    nca_random = NeighborhoodComponentsAnalysis(init='random')
+    nca_random.fit(X, y)
+
+    # Initialize with auto
+    nca_auto = NeighborhoodComponentsAnalysis(init='auto')
+    nca_auto.fit(X, y)
+
+    # Initialize with PCA
+    nca_pca = NeighborhoodComponentsAnalysis(init='pca')
+    nca_pca.fit(X, y)
+
+    # Initialize with LDA
+    nca_lda = NeighborhoodComponentsAnalysis(init='lda')
+    nca_lda.fit(X, y)
+
+    init = rng.rand(X.shape[1], X.shape[1])
+    nca = NeighborhoodComponentsAnalysis(init=init)
+    nca.fit(X, y)
+
+    # init.shape[1] must match X.shape[1]
+    init = rng.rand(X.shape[1], X.shape[1] + 1)
+    nca = NeighborhoodComponentsAnalysis(init=init)
+    assert_raise_message(ValueError,
+                         'The input dimensionality ({}) of the given '
+                         'linear transformation `init` must match the '
+                         'dimensionality of the given inputs `X` ({}).'
+                         .format(init.shape[1], X.shape[1]),
+                         nca.fit, X, y)
+
+    # init.shape[0] must be <= init.shape[1]
+    init = rng.rand(X.shape[1] + 1, X.shape[1])
+    nca = NeighborhoodComponentsAnalysis(init=init)
+    assert_raise_message(ValueError,
+                         'The output dimensionality ({}) of the given '
+                         'linear transformation `init` cannot be '
+                         'greater than its input dimensionality ({}).'
+                         .format(init.shape[0], init.shape[1]),
+                         nca.fit, X, y)
+
+    # init.shape[0] must match n_components
+    init = rng.rand(X.shape[1], X.shape[1])
+    n_components = X.shape[1] - 2
+    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
+    assert_raise_message(ValueError,
+                         'The preferred dimensionality of the '
+                         'projected space `n_components` ({}) does not match '
+                         'the output dimensionality of the given '
+                         'linear transformation `init` ({})!'
+                         .format(n_components, init.shape[0]),
+                         nca.fit, X, y)
+
+
+@pytest.mark.parametrize('n_samples', [3, 5, 7, 11])
+@pytest.mark.parametrize('n_features', [3, 5, 7, 11])
+@pytest.mark.parametrize('n_classes', [5, 7, 11])
+@pytest.mark.parametrize('n_components', [3, 5, 7, 11])
+def test_auto_init(n_samples, n_features, n_classes, n_components):
+    # Test that auto choose the init as expected with every configuration
+    # of order of n_samples, n_features, n_classes and n_components.
+    rng = np.random.RandomState(42)
+    nca_base = NeighborhoodComponentsAnalysis(init='auto',
+                                              n_components=n_components,
+                                              max_iter=1,
+                                              random_state=rng)
+    if n_classes >= n_samples:
+        pass
+        # n_classes > n_samples is impossible, and n_classes == n_samples
+        # throws an error from lda but is an absurd case
+    else:
+        X = rng.randn(n_samples, n_features)
+        y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
+        if n_components > n_features:
+            # this would return a ValueError, which is already tested in
+            # test_params_validation
+            pass
+        else:
+            nca = clone(nca_base)
+            nca.fit(X, y)
+            if n_components <= min(n_classes - 1, n_features):
+                nca_other = clone(nca_base).set_params(init='lda')
+            elif n_components < min(n_features, n_samples):
+                nca_other = clone(nca_base).set_params(init='pca')
+            else:
+                nca_other = clone(nca_base).set_params(init='identity')
+            nca_other.fit(X, y)
+            assert_array_almost_equal(nca.components_, nca_other.components_)
+
+
+def test_warm_start_validation():
+    X, y = make_classification(n_samples=30, n_features=5, n_classes=4,
+                               n_redundant=0, n_informative=5, random_state=0)
+
+    nca = NeighborhoodComponentsAnalysis(warm_start=True, max_iter=5)
+    nca.fit(X, y)
+
+    X_less_features, y = make_classification(n_samples=30, n_features=4,
+                                             n_classes=4, n_redundant=0,
+                                             n_informative=4, random_state=0)
+    assert_raise_message(ValueError,
+                         'The new inputs dimensionality ({}) does not '
+                         'match the input dimensionality of the '
+                         'previously learned transformation ({}).'
+                         .format(X_less_features.shape[1],
+                                 nca.components_.shape[1]),
+                         nca.fit, X_less_features, y)
+
+
+def test_warm_start_effectiveness():
+    # A 1-iteration second fit on same data should give almost same result
+    # with warm starting, and quite different result without warm starting.
+
+    nca_warm = NeighborhoodComponentsAnalysis(warm_start=True, random_state=0)
+    nca_warm.fit(iris_data, iris_target)
+    transformation_warm = nca_warm.components_
+    nca_warm.max_iter = 1
+    nca_warm.fit(iris_data, iris_target)
+    transformation_warm_plus_one = nca_warm.components_
+
+    nca_cold = NeighborhoodComponentsAnalysis(warm_start=False, random_state=0)
+    nca_cold.fit(iris_data, iris_target)
+    transformation_cold = nca_cold.components_
+    nca_cold.max_iter = 1
+    nca_cold.fit(iris_data, iris_target)
+    transformation_cold_plus_one = nca_cold.components_
+
+    diff_warm = np.sum(np.abs(transformation_warm_plus_one -
+                              transformation_warm))
+    diff_cold = np.sum(np.abs(transformation_cold_plus_one -
+                              transformation_cold))
+    assert diff_warm < 3.0, ("Transformer changed significantly after one "
+                             "iteration even though it was warm-started.")
+
+    assert diff_cold > diff_warm, ("Cold-started transformer changed less "
+                                   "significantly than warm-started "
+                                   "transformer after one iteration.")
+
+
+@pytest.mark.parametrize('init_name', ['pca', 'lda', 'identity', 'random',
+                                       'precomputed'])
+def test_verbose(init_name, capsys):
+    # assert there is proper output when verbose = 1, for every initialization
+    # except auto because auto will call one of the others
+    rng = np.random.RandomState(42)
+    X, y = make_blobs(n_samples=30, centers=6, n_features=5, random_state=0)
+    regexp_init = r'... done in \ *\d+\.\d{2}s'
+    msgs = {'pca': "Finding principal components" + regexp_init,
+            'lda': "Finding most discriminative components" + regexp_init}
+    if init_name == 'precomputed':
+        init = rng.randn(X.shape[1], X.shape[1])
+    else:
+        init = init_name
+    nca = NeighborhoodComponentsAnalysis(verbose=1, init=init)
+    nca.fit(X, y)
+    out, _ = capsys.readouterr()
+
+    # check output
+    lines = re.split('\n+', out)
+    # if pca or lda init, an additional line is printed, so we test
+    # it and remove it to test the rest equally among initializations
+    if init_name in ['pca', 'lda']:
+        assert re.match(msgs[init_name], lines[0])
+        lines = lines[1:]
+    assert lines[0] == '[NeighborhoodComponentsAnalysis]'
+    header = '{:>10} {:>20} {:>10}'.format('Iteration', 'Objective Value',
+                                           'Time(s)')
+    assert lines[1] == '[NeighborhoodComponentsAnalysis] {}'.format(header)
+    assert lines[2] == ('[NeighborhoodComponentsAnalysis] {}'
+                        .format('-' * len(header)))
+    for line in lines[3:-2]:
+        # The following regex will match for instance:
+        # '[NeighborhoodComponentsAnalysis]  0    6.988936e+01   0.01'
+        assert re.match(r'\[NeighborhoodComponentsAnalysis\] *\d+ *\d\.\d{6}e'
+                        r'[+|-]\d+\ *\d+\.\d{2}', line)
+    assert re.match(r'\[NeighborhoodComponentsAnalysis\] Training took\ *'
+                    r'\d+\.\d{2}s\.', lines[-2])
+    assert lines[-1] == ''
+
+
+def test_no_verbose(capsys):
+    # assert by default there is no output (verbose=0)
+    nca = NeighborhoodComponentsAnalysis()
+    nca.fit(iris_data, iris_target)
+    out, _ = capsys.readouterr()
+    # check output
+    assert(out == '')
+
+
+def test_singleton_class():
+    X = iris_data
+    y = iris_target
+
+    # one singleton class
+    singleton_class = 1
+    ind_singleton, = np.where(y == singleton_class)
+    y[ind_singleton] = 2
+    y[ind_singleton[0]] = singleton_class
+
+    nca = NeighborhoodComponentsAnalysis(max_iter=30)
+    nca.fit(X, y)
+
+    # One non-singleton class
+    ind_1, = np.where(y == 1)
+    ind_2, = np.where(y == 2)
+    y[ind_1] = 0
+    y[ind_1[0]] = 1
+    y[ind_2] = 0
+    y[ind_2[0]] = 2
+
+    nca = NeighborhoodComponentsAnalysis(max_iter=30)
+    nca.fit(X, y)
+
+    # Only singleton classes
+    ind_0, = np.where(y == 0)
+    ind_1, = np.where(y == 1)
+    ind_2, = np.where(y == 2)
+    X = X[[ind_0[0], ind_1[0], ind_2[0]]]
+    y = y[[ind_0[0], ind_1[0], ind_2[0]]]
+
+    nca = NeighborhoodComponentsAnalysis(init='identity', max_iter=30)
+    nca.fit(X, y)
+    assert_array_equal(X, nca.transform(X))
+
+
+def test_one_class():
+    X = iris_data[iris_target == 0]
+    y = iris_target[iris_target == 0]
+
+    nca = NeighborhoodComponentsAnalysis(max_iter=30,
+                                         n_components=X.shape[1],
+                                         init='identity')
+    nca.fit(X, y)
+    assert_array_equal(X, nca.transform(X))
+
+
+def test_callback(capsys):
+    X = iris_data
+    y = iris_target
+
+    nca = NeighborhoodComponentsAnalysis(callback='my_cb')
+    assert_raises(ValueError, nca.fit, X, y)
+
+    max_iter = 10
+
+    def my_cb(transformation, n_iter):
+        assert transformation.shape == (iris_data.shape[1]**2,)
+        rem_iter = max_iter - n_iter
+        print('{} iterations remaining...'.format(rem_iter))
+
+    # assert that my_cb is called
+    nca = NeighborhoodComponentsAnalysis(max_iter=max_iter,
+                                         callback=my_cb, verbose=1)
+    nca.fit(iris_data, iris_target)
+    out, _ = capsys.readouterr()
+
+    # check output
+    assert('{} iterations remaining...'.format(max_iter - 1) in out)
+
+
+def test_expected_transformation_shape():
+    """Test that the transformation has the expected shape."""
+    X = iris_data
+    y = iris_target
+
+    class TransformationStorer:
+
+        def __init__(self, X, y):
+            # Initialize a fake NCA and variables needed to call the loss
+            # function:
+            self.fake_nca = NeighborhoodComponentsAnalysis()
+            self.fake_nca.n_iter_ = np.inf
+            self.X, y, _ = self.fake_nca._validate_params(X, y)
+            self.same_class_mask = y[:, np.newaxis] == y[np.newaxis, :]
+
+        def callback(self, transformation, n_iter):
+            """Stores the last value of the transformation taken as input by
+            the optimizer"""
+            self.transformation = transformation
+
+    transformation_storer = TransformationStorer(X, y)
+    cb = transformation_storer.callback
+    nca = NeighborhoodComponentsAnalysis(max_iter=5, callback=cb)
+    nca.fit(X, y)
+    assert transformation_storer.transformation.size == X.shape[1]**2
+
+
+def test_convergence_warning():
+    nca = NeighborhoodComponentsAnalysis(max_iter=2, verbose=1)
+    cls_name = nca.__class__.__name__
+    assert_warns_message(ConvergenceWarning,
+                         '[{}] NCA did not converge'.format(cls_name),
+                         nca.fit, iris_data, iris_target)
+
+
+@pytest.mark.parametrize('param, value', [('n_components', np.int32(3)),
+                                          ('max_iter', np.int32(100)),
+                                          ('tol', np.float32(0.0001))])
+def test_parameters_valid_types(param, value):
+    # check that no error is raised when parameters have numpy integer or
+    # floating types.
+    nca = NeighborhoodComponentsAnalysis(**{param: value})
+
+    X = iris_data
+    y = iris_target
+
+    nca.fit(X, y)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_nearest_centroid.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_nearest_centroid.py
@ -0,0 +1,148 @@
+"""
+Testing for the nearest centroid module.
+"""
+
+import numpy as np
+from scipy import sparse as sp
+from numpy.testing import assert_array_equal
+
+from sklearn.neighbors import NearestCentroid
+from sklearn import datasets
+from sklearn.utils._testing import assert_raises
+
+# toy sample
+X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
+X_csr = sp.csr_matrix(X)  # Sparse matrix
+y = [-1, -1, -1, 1, 1, 1]
+T = [[-1, -1], [2, 2], [3, 2]]
+T_csr = sp.csr_matrix(T)
+true_result = [-1, 1, 1]
+
+# also load the iris dataset
+# and randomly permute it
+iris = datasets.load_iris()
+rng = np.random.RandomState(1)
+perm = rng.permutation(iris.target.size)
+iris.data = iris.data[perm]
+iris.target = iris.target[perm]
+
+
+def test_classification_toy():
+    # Check classification on a toy dataset, including sparse versions.
+    clf = NearestCentroid()
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(T), true_result)
+
+    # Same test, but with a sparse matrix to fit and test.
+    clf = NearestCentroid()
+    clf.fit(X_csr, y)
+    assert_array_equal(clf.predict(T_csr), true_result)
+
+    # Fit with sparse, test with non-sparse
+    clf = NearestCentroid()
+    clf.fit(X_csr, y)
+    assert_array_equal(clf.predict(T), true_result)
+
+    # Fit with non-sparse, test with sparse
+    clf = NearestCentroid()
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(T_csr), true_result)
+
+    # Fit and predict with non-CSR sparse matrices
+    clf = NearestCentroid()
+    clf.fit(X_csr.tocoo(), y)
+    assert_array_equal(clf.predict(T_csr.tolil()), true_result)
+
+
+def test_precomputed():
+    clf = NearestCentroid(metric='precomputed')
+    with assert_raises(ValueError):
+        clf.fit(X, y)
+
+
+def test_iris():
+    # Check consistency on dataset iris.
+    for metric in ('euclidean', 'cosine'):
+        clf = NearestCentroid(metric=metric).fit(iris.data, iris.target)
+        score = np.mean(clf.predict(iris.data) == iris.target)
+        assert score > 0.9, "Failed with score = " + str(score)
+
+
+def test_iris_shrinkage():
+    # Check consistency on dataset iris, when using shrinkage.
+    for metric in ('euclidean', 'cosine'):
+        for shrink_threshold in [None, 0.1, 0.5]:
+            clf = NearestCentroid(metric=metric,
+                                  shrink_threshold=shrink_threshold)
+            clf = clf.fit(iris.data, iris.target)
+            score = np.mean(clf.predict(iris.data) == iris.target)
+            assert score > 0.8, "Failed with score = " + str(score)
+
+
+def test_pickle():
+    import pickle
+
+    # classification
+    obj = NearestCentroid()
+    obj.fit(iris.data, iris.target)
+    score = obj.score(iris.data, iris.target)
+    s = pickle.dumps(obj)
+
+    obj2 = pickle.loads(s)
+    assert type(obj2) == obj.__class__
+    score2 = obj2.score(iris.data, iris.target)
+    assert_array_equal(score, score2,
+                       "Failed to generate same score"
+                       " after pickling (classification).")
+
+
+def test_shrinkage_correct():
+    # Ensure that the shrinking is correct.
+    # The expected result is calculated by R (pamr),
+    # which is implemented by the author of the original paper.
+    # (One need to modify the code to output the new centroid in pamr.predict)
+
+    X = np.array([[0, 1], [1, 0], [1, 1], [2, 0], [6, 8]])
+    y = np.array([1, 1, 2, 2, 2])
+    clf = NearestCentroid(shrink_threshold=0.1)
+    clf.fit(X, y)
+    expected_result = np.array([[0.7787310, 0.8545292], [2.814179, 2.763647]])
+    np.testing.assert_array_almost_equal(clf.centroids_, expected_result)
+
+
+def test_shrinkage_threshold_decoded_y():
+    clf = NearestCentroid(shrink_threshold=0.01)
+    y_ind = np.asarray(y)
+    y_ind[y_ind == -1] = 0
+    clf.fit(X, y_ind)
+    centroid_encoded = clf.centroids_
+    clf.fit(X, y)
+    assert_array_equal(centroid_encoded, clf.centroids_)
+
+
+def test_predict_translated_data():
+    # Test that NearestCentroid gives same results on translated data
+
+    rng = np.random.RandomState(0)
+    X = rng.rand(50, 50)
+    y = rng.randint(0, 3, 50)
+    noise = rng.rand(50)
+    clf = NearestCentroid(shrink_threshold=0.1)
+    clf.fit(X, y)
+    y_init = clf.predict(X)
+    clf = NearestCentroid(shrink_threshold=0.1)
+    X_noise = X + noise
+    clf.fit(X_noise, y)
+    y_translate = clf.predict(X_noise)
+    assert_array_equal(y_init, y_translate)
+
+
+def test_manhattan_metric():
+    # Test the manhattan metric.
+
+    clf = NearestCentroid(metric='manhattan')
+    clf.fit(X, y)
+    dense_centroid = clf.centroids_
+    clf.fit(X_csr, y)
+    assert_array_equal(clf.centroids_, dense_centroid)
+    assert_array_equal(dense_centroid, [[-1, -1], [1, 1]])
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_neighbors.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_neighbors.py
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_neighbors_pipeline.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_neighbors_pipeline.py
@ -0,0 +1,221 @@
+"""
+This is testing the equivalence between some estimators with internal nearest
+neighbors computations, and the corresponding pipeline versions with
+KNeighborsTransformer or RadiusNeighborsTransformer to precompute the
+neighbors.
+"""
+
+import numpy as np
+
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.cluster.tests.common import generate_clustered_data
+from sklearn.datasets import make_blobs
+from sklearn.pipeline import make_pipeline
+from sklearn.base import clone
+
+from sklearn.neighbors import KNeighborsTransformer
+from sklearn.neighbors import RadiusNeighborsTransformer
+
+from sklearn.cluster import DBSCAN
+from sklearn.cluster import SpectralClustering
+from sklearn.neighbors import KNeighborsRegressor
+from sklearn.neighbors import RadiusNeighborsRegressor
+from sklearn.neighbors import LocalOutlierFactor
+from sklearn.manifold import SpectralEmbedding
+from sklearn.manifold import Isomap
+from sklearn.manifold import TSNE
+
+
+def test_spectral_clustering():
+    # Test chaining KNeighborsTransformer and SpectralClustering
+    n_neighbors = 5
+    X, _ = make_blobs(random_state=0)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        KNeighborsTransformer(n_neighbors=n_neighbors, mode='connectivity'),
+        SpectralClustering(n_neighbors=n_neighbors, affinity='precomputed',
+                           random_state=42))
+    est_compact = SpectralClustering(
+        n_neighbors=n_neighbors, affinity='nearest_neighbors', random_state=42)
+    labels_compact = est_compact.fit_predict(X)
+    labels_chain = est_chain.fit_predict(X)
+    assert_array_almost_equal(labels_chain, labels_compact)
+
+
+def test_spectral_embedding():
+    # Test chaining KNeighborsTransformer and SpectralEmbedding
+    n_neighbors = 5
+
+    n_samples = 1000
+    centers = np.array([
+        [0.0, 5.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 4.0, 0.0, 0.0],
+        [1.0, 0.0, 0.0, 5.0, 1.0],
+    ])
+    S, true_labels = make_blobs(n_samples=n_samples, centers=centers,
+                                cluster_std=1., random_state=42)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        KNeighborsTransformer(n_neighbors=n_neighbors, mode='connectivity'),
+        SpectralEmbedding(n_neighbors=n_neighbors, affinity='precomputed',
+                          random_state=42))
+    est_compact = SpectralEmbedding(
+        n_neighbors=n_neighbors, affinity='nearest_neighbors', random_state=42)
+    St_compact = est_compact.fit_transform(S)
+    St_chain = est_chain.fit_transform(S)
+    assert_array_almost_equal(St_chain, St_compact)
+
+
+def test_dbscan():
+    # Test chaining RadiusNeighborsTransformer and DBSCAN
+    radius = 0.3
+    n_clusters = 3
+    X = generate_clustered_data(n_clusters=n_clusters)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        RadiusNeighborsTransformer(radius=radius, mode='distance'),
+        DBSCAN(metric='precomputed', eps=radius))
+    est_compact = DBSCAN(eps=radius)
+
+    labels_chain = est_chain.fit_predict(X)
+    labels_compact = est_compact.fit_predict(X)
+    assert_array_almost_equal(labels_chain, labels_compact)
+
+
+def test_isomap():
+    # Test chaining KNeighborsTransformer and Isomap with
+    # neighbors_algorithm='precomputed'
+    algorithm = 'auto'
+    n_neighbors = 10
+
+    X, _ = make_blobs(random_state=0)
+    X2, _ = make_blobs(random_state=1)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        KNeighborsTransformer(n_neighbors=n_neighbors, algorithm=algorithm,
+                              mode='distance'),
+        Isomap(n_neighbors=n_neighbors, metric='precomputed'))
+    est_compact = Isomap(n_neighbors=n_neighbors,
+                         neighbors_algorithm=algorithm)
+
+    Xt_chain = est_chain.fit_transform(X)
+    Xt_compact = est_compact.fit_transform(X)
+    assert_array_almost_equal(Xt_chain, Xt_compact)
+
+    Xt_chain = est_chain.transform(X2)
+    Xt_compact = est_compact.transform(X2)
+    assert_array_almost_equal(Xt_chain, Xt_compact)
+
+
+def test_tsne():
+    # Test chaining KNeighborsTransformer and TSNE
+    n_iter = 250
+    perplexity = 5
+    n_neighbors = int(3. * perplexity + 1)
+
+    rng = np.random.RandomState(0)
+    X = rng.randn(20, 2)
+
+    for metric in ['minkowski', 'sqeuclidean']:
+
+        # compare the chained version and the compact version
+        est_chain = make_pipeline(
+            KNeighborsTransformer(n_neighbors=n_neighbors, mode='distance',
+                                  metric=metric),
+            TSNE(metric='precomputed', perplexity=perplexity,
+                 method="barnes_hut", random_state=42, n_iter=n_iter))
+        est_compact = TSNE(metric=metric, perplexity=perplexity, n_iter=n_iter,
+                           method="barnes_hut", random_state=42)
+
+        Xt_chain = est_chain.fit_transform(X)
+        Xt_compact = est_compact.fit_transform(X)
+        assert_array_almost_equal(Xt_chain, Xt_compact)
+
+
+def test_lof_novelty_false():
+    # Test chaining KNeighborsTransformer and LocalOutlierFactor
+    n_neighbors = 4
+
+    rng = np.random.RandomState(0)
+    X = rng.randn(40, 2)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        KNeighborsTransformer(n_neighbors=n_neighbors, mode='distance'),
+        LocalOutlierFactor(metric='precomputed', n_neighbors=n_neighbors,
+                           novelty=False, contamination="auto"))
+    est_compact = LocalOutlierFactor(n_neighbors=n_neighbors, novelty=False,
+                                     contamination="auto")
+
+    pred_chain = est_chain.fit_predict(X)
+    pred_compact = est_compact.fit_predict(X)
+    assert_array_almost_equal(pred_chain, pred_compact)
+
+
+def test_lof_novelty_true():
+    # Test chaining KNeighborsTransformer and LocalOutlierFactor
+    n_neighbors = 4
+
+    rng = np.random.RandomState(0)
+    X1 = rng.randn(40, 2)
+    X2 = rng.randn(40, 2)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        KNeighborsTransformer(n_neighbors=n_neighbors, mode='distance'),
+        LocalOutlierFactor(metric='precomputed', n_neighbors=n_neighbors,
+                           novelty=True, contamination="auto"))
+    est_compact = LocalOutlierFactor(n_neighbors=n_neighbors, novelty=True,
+                                     contamination="auto")
+
+    pred_chain = est_chain.fit(X1).predict(X2)
+    pred_compact = est_compact.fit(X1).predict(X2)
+    assert_array_almost_equal(pred_chain, pred_compact)
+
+
+def test_kneighbors_regressor():
+    # Test chaining KNeighborsTransformer and classifiers/regressors
+    rng = np.random.RandomState(0)
+    X = 2 * rng.rand(40, 5) - 1
+    X2 = 2 * rng.rand(40, 5) - 1
+    y = rng.rand(40, 1)
+
+    n_neighbors = 12
+    radius = 1.5
+    # We precompute more neighbors than necessary, to have equivalence between
+    # k-neighbors estimator after radius-neighbors transformer, and vice-versa.
+    factor = 2
+
+    k_trans = KNeighborsTransformer(n_neighbors=n_neighbors, mode='distance')
+    k_trans_factor = KNeighborsTransformer(n_neighbors=int(
+        n_neighbors * factor), mode='distance')
+
+    r_trans = RadiusNeighborsTransformer(radius=radius, mode='distance')
+    r_trans_factor = RadiusNeighborsTransformer(radius=int(
+        radius * factor), mode='distance')
+
+    k_reg = KNeighborsRegressor(n_neighbors=n_neighbors)
+    r_reg = RadiusNeighborsRegressor(radius=radius)
+
+    test_list = [
+        (k_trans, k_reg),
+        (k_trans_factor, r_reg),
+        (r_trans, r_reg),
+        (r_trans_factor, k_reg),
+    ]
+
+    for trans, reg in test_list:
+        # compare the chained version and the compact version
+        reg_compact = clone(reg)
+        reg_precomp = clone(reg)
+        reg_precomp.set_params(metric='precomputed')
+
+        reg_chain = make_pipeline(clone(trans), reg_precomp)
+
+        y_pred_chain = reg_chain.fit(X, y).predict(X2)
+        y_pred_compact = reg_compact.fit(X, y).predict(X2)
+        assert_array_almost_equal(y_pred_chain, y_pred_compact)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_neighbors_tree.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_neighbors_tree.py
@ -0,0 +1,279 @@
+# License: BSD 3 clause
+
+import pickle
+import itertools
+
+import numpy as np
+import pytest
+
+from sklearn.neighbors import DistanceMetric
+from sklearn.neighbors._ball_tree import (
+    BallTree, kernel_norm, DTYPE, ITYPE,
+    NeighborsHeap as NeighborsHeapBT,
+    simultaneous_sort as simultaneous_sort_bt,
+    nodeheap_sort as nodeheap_sort_bt)
+from sklearn.neighbors._kd_tree import (
+    KDTree, NeighborsHeap as NeighborsHeapKDT,
+    simultaneous_sort as simultaneous_sort_kdt,
+    nodeheap_sort as nodeheap_sort_kdt)
+
+from sklearn.utils import check_random_state
+from numpy.testing import assert_array_almost_equal, assert_allclose
+
+rng = np.random.RandomState(42)
+V_mahalanobis = rng.rand(3, 3)
+V_mahalanobis = np.dot(V_mahalanobis, V_mahalanobis.T)
+
+DIMENSION = 3
+
+METRICS = {'euclidean': {},
+           'manhattan': {},
+           'minkowski': dict(p=3),
+           'chebyshev': {},
+           'seuclidean': dict(V=rng.random_sample(DIMENSION)),
+           'wminkowski': dict(p=3, w=rng.random_sample(DIMENSION)),
+           'mahalanobis': dict(V=V_mahalanobis)}
+
+KD_TREE_METRICS = ['euclidean', 'manhattan', 'chebyshev', 'minkowski']
+BALL_TREE_METRICS = list(METRICS)
+
+
+def dist_func(x1, x2, p):
+    return np.sum((x1 - x2) ** p) ** (1. / p)
+
+
+def compute_kernel_slow(Y, X, kernel, h):
+    d = np.sqrt(((Y[:, None, :] - X) ** 2).sum(-1))
+    norm = kernel_norm(h, X.shape[1], kernel)
+
+    if kernel == 'gaussian':
+        return norm * np.exp(-0.5 * (d * d) / (h * h)).sum(-1)
+    elif kernel == 'tophat':
+        return norm * (d < h).sum(-1)
+    elif kernel == 'epanechnikov':
+        return norm * ((1.0 - (d * d) / (h * h)) * (d < h)).sum(-1)
+    elif kernel == 'exponential':
+        return norm * (np.exp(-d / h)).sum(-1)
+    elif kernel == 'linear':
+        return norm * ((1 - d / h) * (d < h)).sum(-1)
+    elif kernel == 'cosine':
+        return norm * (np.cos(0.5 * np.pi * d / h) * (d < h)).sum(-1)
+    else:
+        raise ValueError('kernel not recognized')
+
+
+def brute_force_neighbors(X, Y, k, metric, **kwargs):
+    D = DistanceMetric.get_metric(metric, **kwargs).pairwise(Y, X)
+    ind = np.argsort(D, axis=1)[:, :k]
+    dist = D[np.arange(Y.shape[0])[:, None], ind]
+    return dist, ind
+
+
+@pytest.mark.parametrize('Cls', [KDTree, BallTree])
+@pytest.mark.parametrize("kernel", ['gaussian', 'tophat', 'epanechnikov',
+                                    'exponential', 'linear', 'cosine'])
+@pytest.mark.parametrize("h", [0.01, 0.1, 1])
+@pytest.mark.parametrize("rtol", [0, 1E-5])
+@pytest.mark.parametrize("atol", [1E-6, 1E-2])
+@pytest.mark.parametrize("breadth_first", [True, False])
+def test_kernel_density(Cls, kernel, h, rtol, atol, breadth_first,
+                        n_samples=100, n_features=3):
+    rng = check_random_state(1)
+    X = rng.random_sample((n_samples, n_features))
+    Y = rng.random_sample((n_samples, n_features))
+    dens_true = compute_kernel_slow(Y, X, kernel, h)
+
+    tree = Cls(X, leaf_size=10)
+    dens = tree.kernel_density(Y, h, atol=atol, rtol=rtol,
+                               kernel=kernel,
+                               breadth_first=breadth_first)
+    assert_allclose(dens, dens_true,
+                    atol=atol, rtol=max(rtol, 1e-7))
+
+
+@pytest.mark.parametrize('Cls', [KDTree, BallTree])
+def test_neighbor_tree_query_radius(Cls, n_samples=100, n_features=10):
+    rng = check_random_state(0)
+    X = 2 * rng.random_sample(size=(n_samples, n_features)) - 1
+    query_pt = np.zeros(n_features, dtype=float)
+
+    eps = 1E-15  # roundoff error can cause test to fail
+    tree = Cls(X, leaf_size=5)
+    rad = np.sqrt(((X - query_pt) ** 2).sum(1))
+
+    for r in np.linspace(rad[0], rad[-1], 100):
+        ind = tree.query_radius([query_pt], r + eps)[0]
+        i = np.where(rad <= r + eps)[0]
+
+        ind.sort()
+        i.sort()
+
+        assert_array_almost_equal(i, ind)
+
+
+@pytest.mark.parametrize('Cls', [KDTree, BallTree])
+def test_neighbor_tree_query_radius_distance(Cls, n_samples=100,
+                                             n_features=10):
+    rng = check_random_state(0)
+    X = 2 * rng.random_sample(size=(n_samples, n_features)) - 1
+    query_pt = np.zeros(n_features, dtype=float)
+
+    eps = 1E-15  # roundoff error can cause test to fail
+    tree = Cls(X, leaf_size=5)
+    rad = np.sqrt(((X - query_pt) ** 2).sum(1))
+
+    for r in np.linspace(rad[0], rad[-1], 100):
+        ind, dist = tree.query_radius([query_pt], r + eps,
+                                      return_distance=True)
+
+        ind = ind[0]
+        dist = dist[0]
+
+        d = np.sqrt(((query_pt - X[ind]) ** 2).sum(1))
+
+        assert_array_almost_equal(d, dist)
+
+
+@pytest.mark.parametrize('Cls', [KDTree, BallTree])
+@pytest.mark.parametrize('dualtree', (True, False))
+def test_neighbor_tree_two_point(Cls, dualtree, n_samples=100, n_features=3):
+    rng = check_random_state(0)
+    X = rng.random_sample((n_samples, n_features))
+    Y = rng.random_sample((n_samples, n_features))
+    r = np.linspace(0, 1, 10)
+    tree = Cls(X, leaf_size=10)
+
+    D = DistanceMetric.get_metric("euclidean").pairwise(Y, X)
+    counts_true = [(D <= ri).sum() for ri in r]
+
+    counts = tree.two_point_correlation(Y, r=r, dualtree=dualtree)
+    assert_array_almost_equal(counts, counts_true)
+
+
+@pytest.mark.parametrize('NeighborsHeap', [NeighborsHeapBT, NeighborsHeapKDT])
+def test_neighbors_heap(NeighborsHeap, n_pts=5, n_nbrs=10):
+    heap = NeighborsHeap(n_pts, n_nbrs)
+    rng = check_random_state(0)
+
+    for row in range(n_pts):
+        d_in = rng.random_sample(2 * n_nbrs).astype(DTYPE, copy=False)
+        i_in = np.arange(2 * n_nbrs, dtype=ITYPE)
+        for d, i in zip(d_in, i_in):
+            heap.push(row, d, i)
+
+        ind = np.argsort(d_in)
+        d_in = d_in[ind]
+        i_in = i_in[ind]
+
+        d_heap, i_heap = heap.get_arrays(sort=True)
+
+        assert_array_almost_equal(d_in[:n_nbrs], d_heap[row])
+        assert_array_almost_equal(i_in[:n_nbrs], i_heap[row])
+
+
+@pytest.mark.parametrize('nodeheap_sort', [nodeheap_sort_bt,
+                                           nodeheap_sort_kdt])
+def test_node_heap(nodeheap_sort, n_nodes=50):
+    rng = check_random_state(0)
+    vals = rng.random_sample(n_nodes).astype(DTYPE, copy=False)
+
+    i1 = np.argsort(vals)
+    vals2, i2 = nodeheap_sort(vals)
+
+    assert_array_almost_equal(i1, i2)
+    assert_array_almost_equal(vals[i1], vals2)
+
+
+@pytest.mark.parametrize('simultaneous_sort', [simultaneous_sort_bt,
+                                               simultaneous_sort_kdt])
+def test_simultaneous_sort(simultaneous_sort, n_rows=10, n_pts=201):
+    rng = check_random_state(0)
+    dist = rng.random_sample((n_rows, n_pts)).astype(DTYPE, copy=False)
+    ind = (np.arange(n_pts) + np.zeros((n_rows, 1))).astype(ITYPE, copy=False)
+
+    dist2 = dist.copy()
+    ind2 = ind.copy()
+
+    # simultaneous sort rows using function
+    simultaneous_sort(dist, ind)
+
+    # simultaneous sort rows using numpy
+    i = np.argsort(dist2, axis=1)
+    row_ind = np.arange(n_rows)[:, None]
+    dist2 = dist2[row_ind, i]
+    ind2 = ind2[row_ind, i]
+
+    assert_array_almost_equal(dist, dist2)
+    assert_array_almost_equal(ind, ind2)
+
+
+@pytest.mark.parametrize('Cls', [KDTree, BallTree])
+def test_gaussian_kde(Cls, n_samples=1000):
+    # Compare gaussian KDE results to scipy.stats.gaussian_kde
+    from scipy.stats import gaussian_kde
+    rng = check_random_state(0)
+    x_in = rng.normal(0, 1, n_samples)
+    x_out = np.linspace(-5, 5, 30)
+
+    for h in [0.01, 0.1, 1]:
+        tree = Cls(x_in[:, None])
+        gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in))
+
+        dens_tree = tree.kernel_density(x_out[:, None], h) / n_samples
+        dens_gkde = gkde.evaluate(x_out)
+
+        assert_array_almost_equal(dens_tree, dens_gkde, decimal=3)
+
+
+@pytest.mark.parametrize(
+        'Cls, metric',
+        itertools.chain(
+            [(KDTree, metric) for metric in KD_TREE_METRICS],
+            [(BallTree, metric) for metric in BALL_TREE_METRICS]))
+@pytest.mark.parametrize('k', (1, 3, 5))
+@pytest.mark.parametrize('dualtree', (True, False))
+@pytest.mark.parametrize('breadth_first', (True, False))
+def test_nn_tree_query(Cls, metric, k, dualtree, breadth_first):
+    rng = check_random_state(0)
+    X = rng.random_sample((40, DIMENSION))
+    Y = rng.random_sample((10, DIMENSION))
+
+    kwargs = METRICS[metric]
+
+    kdt = Cls(X, leaf_size=1, metric=metric, **kwargs)
+    dist1, ind1 = kdt.query(Y, k, dualtree=dualtree,
+                            breadth_first=breadth_first)
+    dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)
+
+    # don't check indices here: if there are any duplicate distances,
+    # the indices may not match.  Distances should not have this problem.
+    assert_array_almost_equal(dist1, dist2)
+
+
+@pytest.mark.parametrize(
+        "Cls, metric",
+        [(KDTree, 'euclidean'), (BallTree, 'euclidean'),
+         (BallTree, dist_func)])
+@pytest.mark.parametrize('protocol', (0, 1, 2))
+def test_pickle(Cls, metric, protocol):
+    rng = check_random_state(0)
+    X = rng.random_sample((10, 3))
+
+    if hasattr(metric, '__call__'):
+        kwargs = {'p': 2}
+    else:
+        kwargs = {}
+
+    tree1 = Cls(X, leaf_size=1, metric=metric, **kwargs)
+
+    ind1, dist1 = tree1.query(X)
+
+    s = pickle.dumps(tree1, protocol=protocol)
+    tree2 = pickle.loads(s)
+
+    ind2, dist2 = tree2.query(X)
+
+    assert_array_almost_equal(ind1, ind2)
+    assert_array_almost_equal(dist1, dist2)
+
+    assert isinstance(tree2, Cls)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_quad_tree.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_quad_tree.py
@ -0,0 +1,104 @@
+import pickle
+import numpy as np
+
+import pytest
+
+from sklearn.neighbors._quad_tree import _QuadTree
+from sklearn.utils import check_random_state
+
+
+def test_quadtree_boundary_computation():
+    # Introduce a point into a quad tree with boundaries not easy to compute.
+    Xs = []
+
+    # check a random case
+    Xs.append(np.array([[-1, 1], [-4, -1]], dtype=np.float32))
+    # check the case where only 0 are inserted
+    Xs.append(np.array([[0, 0], [0, 0]], dtype=np.float32))
+    # check the case where only negative are inserted
+    Xs.append(np.array([[-1, -2], [-4, 0]], dtype=np.float32))
+    # check the case where only small numbers are inserted
+    Xs.append(np.array([[-1e-6, 1e-6], [-4e-6, -1e-6]], dtype=np.float32))
+
+    for X in Xs:
+        tree = _QuadTree(n_dimensions=2, verbose=0)
+        tree.build_tree(X)
+        tree._check_coherence()
+
+
+def test_quadtree_similar_point():
+    # Introduce a point into a quad tree where a similar point already exists.
+    # Test will hang if it doesn't complete.
+    Xs = []
+
+    # check the case where points are actually different
+    Xs.append(np.array([[1, 2], [3, 4]], dtype=np.float32))
+    # check the case where points are the same on X axis
+    Xs.append(np.array([[1.0, 2.0], [1.0, 3.0]], dtype=np.float32))
+    # check the case where points are arbitrarily close on X axis
+    Xs.append(np.array([[1.00001, 2.0], [1.00002, 3.0]], dtype=np.float32))
+    # check the case where points are the same on Y axis
+    Xs.append(np.array([[1.0, 2.0], [3.0, 2.0]], dtype=np.float32))
+    # check the case where points are arbitrarily close on Y axis
+    Xs.append(np.array([[1.0, 2.00001], [3.0, 2.00002]], dtype=np.float32))
+    # check the case where points are arbitrarily close on both axes
+    Xs.append(np.array([[1.00001, 2.00001], [1.00002, 2.00002]],
+              dtype=np.float32))
+
+    # check the case where points are arbitrarily close on both axes
+    # close to machine epsilon - x axis
+    Xs.append(np.array([[1, 0.0003817754041], [2, 0.0003817753750]],
+              dtype=np.float32))
+
+    # check the case where points are arbitrarily close on both axes
+    # close to machine epsilon - y axis
+    Xs.append(np.array([[0.0003817754041, 1.0], [0.0003817753750, 2.0]],
+              dtype=np.float32))
+
+    for X in Xs:
+        tree = _QuadTree(n_dimensions=2, verbose=0)
+        tree.build_tree(X)
+        tree._check_coherence()
+
+
+@pytest.mark.parametrize('n_dimensions', (2, 3))
+@pytest.mark.parametrize('protocol', (0, 1, 2))
+def test_quad_tree_pickle(n_dimensions, protocol):
+    rng = check_random_state(0)
+
+    X = rng.random_sample((10, n_dimensions))
+
+    tree = _QuadTree(n_dimensions=n_dimensions, verbose=0)
+    tree.build_tree(X)
+
+    s = pickle.dumps(tree, protocol=protocol)
+    bt2 = pickle.loads(s)
+
+    for x in X:
+        cell_x_tree = tree.get_cell(x)
+        cell_x_bt2 = bt2.get_cell(x)
+        assert cell_x_tree == cell_x_bt2
+
+
+@pytest.mark.parametrize('n_dimensions', (2, 3))
+def test_qt_insert_duplicate(n_dimensions):
+    rng = check_random_state(0)
+
+    X = rng.random_sample((10, n_dimensions))
+    Xd = np.r_[X, X[:5]]
+    tree = _QuadTree(n_dimensions=n_dimensions, verbose=0)
+    tree.build_tree(Xd)
+
+    cumulative_size = tree.cumulative_size
+    leafs = tree.leafs
+
+    # Assert that the first 5 are indeed duplicated and that the next
+    # ones are single point leaf
+    for i, x in enumerate(X):
+        cell_id = tree.get_cell(x)
+        assert leafs[cell_id]
+        assert cumulative_size[cell_id] == 1 + (i < 5)
+
+
+def test_summarize():
+    _QuadTree.test_summarize()
--- a/venv/Lib/site-packages/sklearn/neighbors/typedefs.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/typedefs.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _typedefs  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.neighbors.typedefs'
+correct_import_path = 'sklearn.neighbors'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_typedefs, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/neighbors/unsupervised.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/unsupervised.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _unsupervised  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.neighbors.unsupervised'
+correct_import_path = 'sklearn.neighbors'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_unsupervised, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)