Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
1239
venv/Lib/site-packages/sklearn/utils/__init__.py
Normal file
1239
venv/Lib/site-packages/sklearn/utils/__init__.py
Normal file
File diff suppressed because it is too large
Load diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
venv/Lib/site-packages/sklearn/utils/_cython_blas.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/sklearn/utils/_cython_blas.cp36-win32.pyd
Normal file
Binary file not shown.
41
venv/Lib/site-packages/sklearn/utils/_cython_blas.pxd
Normal file
41
venv/Lib/site-packages/sklearn/utils/_cython_blas.pxd
Normal file
|
@ -0,0 +1,41 @@
|
|||
from cython cimport floating
|
||||
|
||||
|
||||
cpdef enum BLAS_Order:
|
||||
RowMajor # C contiguous
|
||||
ColMajor # Fortran contiguous
|
||||
|
||||
|
||||
cpdef enum BLAS_Trans:
|
||||
NoTrans = 110 # correspond to 'n'
|
||||
Trans = 116 # correspond to 't'
|
||||
|
||||
|
||||
# BLAS Level 1 ################################################################
|
||||
cdef floating _dot(int, floating*, int, floating*, int) nogil
|
||||
|
||||
cdef floating _asum(int, floating*, int) nogil
|
||||
|
||||
cdef void _axpy(int, floating, floating*, int, floating*, int) nogil
|
||||
|
||||
cdef floating _nrm2(int, floating*, int) nogil
|
||||
|
||||
cdef void _copy(int, floating*, int, floating*, int) nogil
|
||||
|
||||
cdef void _scal(int, floating, floating*, int) nogil
|
||||
|
||||
cdef void _rotg(floating*, floating*, floating*, floating*) nogil
|
||||
|
||||
cdef void _rot(int, floating*, int, floating*, int, floating, floating) nogil
|
||||
|
||||
# BLAS Level 2 ################################################################
|
||||
cdef void _gemv(BLAS_Order, BLAS_Trans, int, int, floating, floating*, int,
|
||||
floating*, int, floating, floating*, int) nogil
|
||||
|
||||
cdef void _ger(BLAS_Order, int, int, floating, floating*, int, floating*, int,
|
||||
floating*, int) nogil
|
||||
|
||||
# BLASLevel 3 ################################################################
|
||||
cdef void _gemm(BLAS_Order, BLAS_Trans, BLAS_Trans, int, int, int, floating,
|
||||
floating*, int, floating*, int, floating, floating*,
|
||||
int) nogil
|
311
venv/Lib/site-packages/sklearn/utils/_estimator_html_repr.py
Normal file
311
venv/Lib/site-packages/sklearn/utils/_estimator_html_repr.py
Normal file
|
@ -0,0 +1,311 @@
|
|||
from contextlib import closing
|
||||
from contextlib import suppress
|
||||
from io import StringIO
|
||||
import uuid
|
||||
import html
|
||||
|
||||
from sklearn import config_context
|
||||
|
||||
|
||||
class _VisualBlock:
|
||||
"""HTML Representation of Estimator
|
||||
|
||||
Parameters
|
||||
----------
|
||||
kind : {'serial', 'parallel', 'single'}
|
||||
kind of HTML block
|
||||
|
||||
estimators : list of estimators or `_VisualBlock`s or a single estimator
|
||||
If kind != 'single', then `estimators` is a list of
|
||||
estimators.
|
||||
If kind == 'single', then `estimators` is a single estimator.
|
||||
|
||||
names : list of str
|
||||
If kind != 'single', then `names` corresponds to estimators.
|
||||
If kind == 'single', then `names` is a single string corresponding to
|
||||
the single estimator.
|
||||
|
||||
name_details : list of str, str, or None, default=None
|
||||
If kind != 'single', then `name_details` corresponds to `names`.
|
||||
If kind == 'single', then `name_details` is a single string
|
||||
corresponding to the single estimator.
|
||||
|
||||
dash_wrapped : bool, default=True
|
||||
If true, wrapped HTML element will be wrapped with a dashed border.
|
||||
Only active when kind != 'single'.
|
||||
"""
|
||||
def __init__(self, kind, estimators, *, names=None, name_details=None,
|
||||
dash_wrapped=True):
|
||||
self.kind = kind
|
||||
self.estimators = estimators
|
||||
self.dash_wrapped = dash_wrapped
|
||||
|
||||
if self.kind in ('parallel', 'serial'):
|
||||
if names is None:
|
||||
names = (None, ) * len(estimators)
|
||||
if name_details is None:
|
||||
name_details = (None, ) * len(estimators)
|
||||
|
||||
self.names = names
|
||||
self.name_details = name_details
|
||||
|
||||
def _sk_visual_block_(self):
|
||||
return self
|
||||
|
||||
|
||||
def _write_label_html(out, name, name_details,
|
||||
outer_class="sk-label-container",
|
||||
inner_class="sk-label",
|
||||
checked=False):
|
||||
"""Write labeled html with or without a dropdown with named details"""
|
||||
out.write(f'<div class="{outer_class}">'
|
||||
f'<div class="{inner_class} sk-toggleable">')
|
||||
name = html.escape(name)
|
||||
|
||||
if name_details is not None:
|
||||
checked_str = 'checked' if checked else ''
|
||||
est_id = uuid.uuid4()
|
||||
out.write(f'<input class="sk-toggleable__control sk-hidden--visually" '
|
||||
f'id="{est_id}" type="checkbox" {checked_str}>'
|
||||
f'<label class="sk-toggleable__label" for="{est_id}">'
|
||||
f'{name}</label>'
|
||||
f'<div class="sk-toggleable__content"><pre>{name_details}'
|
||||
f'</pre></div>')
|
||||
else:
|
||||
out.write(f'<label>{name}</label>')
|
||||
out.write('</div></div>') # outer_class inner_class
|
||||
|
||||
|
||||
def _get_visual_block(estimator):
|
||||
"""Generate information about how to display an estimator.
|
||||
"""
|
||||
with suppress(AttributeError):
|
||||
return estimator._sk_visual_block_()
|
||||
|
||||
if isinstance(estimator, str):
|
||||
return _VisualBlock('single', estimator,
|
||||
names=estimator, name_details=estimator)
|
||||
elif estimator is None:
|
||||
return _VisualBlock('single', estimator,
|
||||
names='None', name_details='None')
|
||||
|
||||
# check if estimator looks like a meta estimator wraps estimators
|
||||
if hasattr(estimator, 'get_params'):
|
||||
estimators = []
|
||||
for key, value in estimator.get_params().items():
|
||||
# Only look at the estimators in the first layer
|
||||
if '__' not in key and hasattr(value, 'get_params'):
|
||||
estimators.append(value)
|
||||
if len(estimators):
|
||||
return _VisualBlock('parallel', estimators, names=None)
|
||||
|
||||
return _VisualBlock('single', estimator,
|
||||
names=estimator.__class__.__name__,
|
||||
name_details=str(estimator))
|
||||
|
||||
|
||||
def _write_estimator_html(out, estimator, estimator_label,
|
||||
estimator_label_details, first_call=False):
|
||||
"""Write estimator to html in serial, parallel, or by itself (single).
|
||||
"""
|
||||
if first_call:
|
||||
est_block = _get_visual_block(estimator)
|
||||
else:
|
||||
with config_context(print_changed_only=True):
|
||||
est_block = _get_visual_block(estimator)
|
||||
|
||||
if est_block.kind in ('serial', 'parallel'):
|
||||
dashed_wrapped = first_call or est_block.dash_wrapped
|
||||
dash_cls = " sk-dashed-wrapped" if dashed_wrapped else ""
|
||||
out.write(f'<div class="sk-item{dash_cls}">')
|
||||
|
||||
if estimator_label:
|
||||
_write_label_html(out, estimator_label, estimator_label_details)
|
||||
|
||||
kind = est_block.kind
|
||||
out.write(f'<div class="sk-{kind}">')
|
||||
est_infos = zip(est_block.estimators, est_block.names,
|
||||
est_block.name_details)
|
||||
|
||||
for est, name, name_details in est_infos:
|
||||
if kind == 'serial':
|
||||
_write_estimator_html(out, est, name, name_details)
|
||||
else: # parallel
|
||||
out.write('<div class="sk-parallel-item">')
|
||||
# wrap element in a serial visualblock
|
||||
serial_block = _VisualBlock('serial', [est],
|
||||
dash_wrapped=False)
|
||||
_write_estimator_html(out, serial_block, name, name_details)
|
||||
out.write('</div>') # sk-parallel-item
|
||||
|
||||
out.write('</div></div>')
|
||||
elif est_block.kind == 'single':
|
||||
_write_label_html(out, est_block.names, est_block.name_details,
|
||||
outer_class="sk-item", inner_class="sk-estimator",
|
||||
checked=first_call)
|
||||
|
||||
|
||||
_STYLE = """
|
||||
div.sk-top-container {
|
||||
color: black;
|
||||
background-color: white;
|
||||
}
|
||||
div.sk-toggleable {
|
||||
background-color: white;
|
||||
}
|
||||
label.sk-toggleable__label {
|
||||
cursor: pointer;
|
||||
display: block;
|
||||
width: 100%;
|
||||
margin-bottom: 0;
|
||||
padding: 0.2em 0.3em;
|
||||
box-sizing: border-box;
|
||||
text-align: center;
|
||||
}
|
||||
div.sk-toggleable__content {
|
||||
max-height: 0;
|
||||
max-width: 0;
|
||||
overflow: hidden;
|
||||
text-align: left;
|
||||
background-color: #f0f8ff;
|
||||
}
|
||||
div.sk-toggleable__content pre {
|
||||
margin: 0.2em;
|
||||
color: black;
|
||||
border-radius: 0.25em;
|
||||
background-color: #f0f8ff;
|
||||
}
|
||||
input.sk-toggleable__control:checked~div.sk-toggleable__content {
|
||||
max-height: 200px;
|
||||
max-width: 100%;
|
||||
overflow: auto;
|
||||
}
|
||||
div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {
|
||||
background-color: #d4ebff;
|
||||
}
|
||||
div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {
|
||||
background-color: #d4ebff;
|
||||
}
|
||||
input.sk-hidden--visually {
|
||||
border: 0;
|
||||
clip: rect(1px 1px 1px 1px);
|
||||
clip: rect(1px, 1px, 1px, 1px);
|
||||
height: 1px;
|
||||
margin: -1px;
|
||||
overflow: hidden;
|
||||
padding: 0;
|
||||
position: absolute;
|
||||
width: 1px;
|
||||
}
|
||||
div.sk-estimator {
|
||||
font-family: monospace;
|
||||
background-color: #f0f8ff;
|
||||
margin: 0.25em 0.25em;
|
||||
border: 1px dotted black;
|
||||
border-radius: 0.25em;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
div.sk-estimator:hover {
|
||||
background-color: #d4ebff;
|
||||
}
|
||||
div.sk-parallel-item::after {
|
||||
content: "";
|
||||
width: 100%;
|
||||
border-bottom: 1px solid gray;
|
||||
flex-grow: 1;
|
||||
}
|
||||
div.sk-label:hover label.sk-toggleable__label {
|
||||
background-color: #d4ebff;
|
||||
}
|
||||
div.sk-serial::before {
|
||||
content: "";
|
||||
position: absolute;
|
||||
border-left: 1px solid gray;
|
||||
box-sizing: border-box;
|
||||
top: 2em;
|
||||
bottom: 0;
|
||||
left: 50%;
|
||||
}
|
||||
div.sk-serial {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
background-color: white;
|
||||
}
|
||||
div.sk-item {
|
||||
z-index: 1;
|
||||
}
|
||||
div.sk-parallel {
|
||||
display: flex;
|
||||
align-items: stretch;
|
||||
justify-content: center;
|
||||
background-color: white;
|
||||
}
|
||||
div.sk-parallel-item {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
position: relative;
|
||||
background-color: white;
|
||||
}
|
||||
div.sk-parallel-item:first-child::after {
|
||||
align-self: flex-end;
|
||||
width: 50%;
|
||||
}
|
||||
div.sk-parallel-item:last-child::after {
|
||||
align-self: flex-start;
|
||||
width: 50%;
|
||||
}
|
||||
div.sk-parallel-item:only-child::after {
|
||||
width: 0;
|
||||
}
|
||||
div.sk-dashed-wrapped {
|
||||
border: 1px dashed gray;
|
||||
margin: 0.2em;
|
||||
box-sizing: border-box;
|
||||
padding-bottom: 0.1em;
|
||||
background-color: white;
|
||||
position: relative;
|
||||
}
|
||||
div.sk-label label {
|
||||
font-family: monospace;
|
||||
font-weight: bold;
|
||||
background-color: white;
|
||||
display: inline-block;
|
||||
line-height: 1.2em;
|
||||
}
|
||||
div.sk-label-container {
|
||||
position: relative;
|
||||
z-index: 2;
|
||||
text-align: center;
|
||||
}
|
||||
div.sk-container {
|
||||
display: inline-block;
|
||||
position: relative;
|
||||
}
|
||||
""".replace(' ', '').replace('\n', '') # noqa
|
||||
|
||||
|
||||
def estimator_html_repr(estimator):
|
||||
"""Build a HTML representation of an estimator.
|
||||
|
||||
Read more in the :ref:`User Guide <visualizing_composite_estimators>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : estimator object
|
||||
The estimator to visualize.
|
||||
|
||||
Returns
|
||||
-------
|
||||
html: str
|
||||
HTML representation of estimator.
|
||||
"""
|
||||
with closing(StringIO()) as out:
|
||||
out.write(f'<style>{_STYLE}</style>'
|
||||
f'<div class="sk-top-container"><div class="sk-container">')
|
||||
_write_estimator_html(out, estimator, estimator.__class__.__name__,
|
||||
str(estimator), first_call=True)
|
||||
out.write('</div></div>')
|
||||
|
||||
html_output = out.getvalue()
|
||||
return html_output
|
BIN
venv/Lib/site-packages/sklearn/utils/_fast_dict.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/sklearn/utils/_fast_dict.cp36-win32.pyd
Normal file
Binary file not shown.
22
venv/Lib/site-packages/sklearn/utils/_fast_dict.pxd
Normal file
22
venv/Lib/site-packages/sklearn/utils/_fast_dict.pxd
Normal file
|
@ -0,0 +1,22 @@
|
|||
# Author: Gael Varoquaux
|
||||
# License: BSD
|
||||
"""
|
||||
Uses C++ map containers for fast dict-like behavior with keys being
|
||||
integers, and values float.
|
||||
"""
|
||||
|
||||
from libcpp.map cimport map as cpp_map
|
||||
|
||||
# Import the C-level symbols of numpy
|
||||
cimport numpy as np
|
||||
|
||||
ctypedef np.float64_t DTYPE_t
|
||||
|
||||
ctypedef np.intp_t ITYPE_t
|
||||
|
||||
###############################################################################
|
||||
# An object to be used in Python
|
||||
|
||||
cdef class IntFloatDict:
|
||||
cdef cpp_map[ITYPE_t, DTYPE_t] my_map
|
||||
cdef _to_arrays(self, ITYPE_t [:] keys, DTYPE_t [:] values)
|
19
venv/Lib/site-packages/sklearn/utils/_joblib.py
Normal file
19
venv/Lib/site-packages/sklearn/utils/_joblib.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
import warnings as _warnings
|
||||
|
||||
with _warnings.catch_warnings():
|
||||
_warnings.simplefilter("ignore")
|
||||
# joblib imports may raise DeprecationWarning on certain Python
|
||||
# versions
|
||||
import joblib
|
||||
from joblib import logger
|
||||
from joblib import dump, load
|
||||
from joblib import __version__
|
||||
from joblib import effective_n_jobs
|
||||
from joblib import hash
|
||||
from joblib import cpu_count, Parallel, Memory, delayed
|
||||
from joblib import parallel_backend, register_parallel_backend
|
||||
|
||||
|
||||
__all__ = ["parallel_backend", "register_parallel_backend", "cpu_count",
|
||||
"Parallel", "Memory", "delayed", "effective_n_jobs", "hash",
|
||||
"logger", "dump", "load", "joblib", "__version__"]
|
Binary file not shown.
19
venv/Lib/site-packages/sklearn/utils/_mask.py
Normal file
19
venv/Lib/site-packages/sklearn/utils/_mask.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
import numpy as np
|
||||
|
||||
from . import is_scalar_nan
|
||||
from .fixes import _object_dtype_isnan
|
||||
|
||||
|
||||
def _get_mask(X, value_to_mask):
|
||||
"""Compute the boolean mask X == value_to_mask."""
|
||||
if is_scalar_nan(value_to_mask):
|
||||
if X.dtype.kind == "f":
|
||||
return np.isnan(X)
|
||||
elif X.dtype.kind in ("i", "u"):
|
||||
# can't have NaNs in integer array.
|
||||
return np.zeros(X.shape, dtype=bool)
|
||||
else:
|
||||
# np.isnan does not work on object dtypes.
|
||||
return _object_dtype_isnan(X)
|
||||
else:
|
||||
return X == value_to_mask
|
166
venv/Lib/site-packages/sklearn/utils/_mocking.py
Normal file
166
venv/Lib/site-packages/sklearn/utils/_mocking.py
Normal file
|
@ -0,0 +1,166 @@
|
|||
import numpy as np
|
||||
|
||||
from ..base import BaseEstimator, ClassifierMixin
|
||||
from .validation import _num_samples, check_array
|
||||
|
||||
|
||||
class ArraySlicingWrapper:
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
array
|
||||
"""
|
||||
def __init__(self, array):
|
||||
self.array = array
|
||||
|
||||
def __getitem__(self, aslice):
|
||||
return MockDataFrame(self.array[aslice])
|
||||
|
||||
|
||||
class MockDataFrame:
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
array
|
||||
"""
|
||||
# have shape and length but don't support indexing.
|
||||
def __init__(self, array):
|
||||
self.array = array
|
||||
self.values = array
|
||||
self.shape = array.shape
|
||||
self.ndim = array.ndim
|
||||
# ugly hack to make iloc work.
|
||||
self.iloc = ArraySlicingWrapper(array)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.array)
|
||||
|
||||
def __array__(self, dtype=None):
|
||||
# Pandas data frames also are array-like: we want to make sure that
|
||||
# input validation in cross-validation does not try to call that
|
||||
# method.
|
||||
return self.array
|
||||
|
||||
def __eq__(self, other):
|
||||
return MockDataFrame(self.array == other.array)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
|
||||
class CheckingClassifier(ClassifierMixin, BaseEstimator):
|
||||
"""Dummy classifier to test pipelining and meta-estimators.
|
||||
|
||||
Checks some property of X and y in fit / predict.
|
||||
This allows testing whether pipelines / cross-validation or metaestimators
|
||||
changed the input.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
check_y
|
||||
check_X
|
||||
foo_param
|
||||
expected_fit_params
|
||||
|
||||
Attributes
|
||||
----------
|
||||
classes_
|
||||
"""
|
||||
def __init__(self, check_y=None, check_X=None, foo_param=0,
|
||||
expected_fit_params=None):
|
||||
self.check_y = check_y
|
||||
self.check_X = check_X
|
||||
self.foo_param = foo_param
|
||||
self.expected_fit_params = expected_fit_params
|
||||
|
||||
def fit(self, X, y, **fit_params):
|
||||
"""
|
||||
Fit classifier
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Training vector, where n_samples is the number of samples and
|
||||
n_features is the number of features.
|
||||
|
||||
y : array-like of shape (n_samples, n_output) or (n_samples,), optional
|
||||
Target relative to X for classification or regression;
|
||||
None for unsupervised learning.
|
||||
|
||||
**fit_params : dict of string -> object
|
||||
Parameters passed to the ``fit`` method of the estimator
|
||||
"""
|
||||
assert len(X) == len(y)
|
||||
if self.check_X is not None:
|
||||
assert self.check_X(X)
|
||||
if self.check_y is not None:
|
||||
assert self.check_y(y)
|
||||
self.n_features_in_ = len(X)
|
||||
self.classes_ = np.unique(check_array(y, ensure_2d=False,
|
||||
allow_nd=True))
|
||||
if self.expected_fit_params:
|
||||
missing = set(self.expected_fit_params) - set(fit_params)
|
||||
assert len(missing) == 0, 'Expected fit parameter(s) %s not ' \
|
||||
'seen.' % list(missing)
|
||||
for key, value in fit_params.items():
|
||||
assert len(value) == len(X), (
|
||||
'Fit parameter %s has length %d; '
|
||||
'expected %d.'
|
||||
% (key, len(value), len(X)))
|
||||
|
||||
return self
|
||||
|
||||
def predict(self, T):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
T : indexable, length n_samples
|
||||
"""
|
||||
if self.check_X is not None:
|
||||
assert self.check_X(T)
|
||||
return self.classes_[np.zeros(_num_samples(T), dtype=np.int)]
|
||||
|
||||
def score(self, X=None, Y=None):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Input data, where n_samples is the number of samples and
|
||||
n_features is the number of features.
|
||||
|
||||
Y : array-like of shape (n_samples, n_output) or (n_samples,), optional
|
||||
Target relative to X for classification or regression;
|
||||
None for unsupervised learning.
|
||||
"""
|
||||
if self.foo_param > 1:
|
||||
score = 1.
|
||||
else:
|
||||
score = 0.
|
||||
return score
|
||||
|
||||
def _more_tags(self):
|
||||
return {'_skip_test': True, 'X_types': ['1dlabel']}
|
||||
|
||||
|
||||
class NoSampleWeightWrapper(BaseEstimator):
|
||||
"""Wrap estimator which will not expose `sample_weight`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
est : estimator, default=None
|
||||
The estimator to wrap.
|
||||
"""
|
||||
def __init__(self, est=None):
|
||||
self.est = est
|
||||
|
||||
def fit(self, X, y):
|
||||
return self.est.fit(X, y)
|
||||
|
||||
def predict(self, X):
|
||||
return self.est.predict(X)
|
||||
|
||||
def predict_proba(self, X):
|
||||
return self.est.predict_proba(X)
|
||||
|
||||
def _more_tags(self):
|
||||
return {'_skip_test': True}
|
Binary file not shown.
438
venv/Lib/site-packages/sklearn/utils/_pprint.py
Normal file
438
venv/Lib/site-packages/sklearn/utils/_pprint.py
Normal file
|
@ -0,0 +1,438 @@
|
|||
"""This module contains the _EstimatorPrettyPrinter class used in
|
||||
BaseEstimator.__repr__ for pretty-printing estimators"""
|
||||
|
||||
# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
||||
# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 Python Software Foundation;
|
||||
# All Rights Reserved
|
||||
|
||||
# Authors: Fred L. Drake, Jr. <fdrake@acm.org> (built-in CPython pprint module)
|
||||
# Nicolas Hug (scikit-learn specific changes)
|
||||
|
||||
# License: PSF License version 2 (see below)
|
||||
|
||||
# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
|
||||
# --------------------------------------------
|
||||
|
||||
# 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"),
|
||||
# and the Individual or Organization ("Licensee") accessing and otherwise
|
||||
# using this software ("Python") in source or binary form and its associated
|
||||
# documentation.
|
||||
|
||||
# 2. Subject to the terms and conditions of this License Agreement, PSF hereby
|
||||
# grants Licensee a nonexclusive, royalty-free, world-wide license to
|
||||
# reproduce, analyze, test, perform and/or display publicly, prepare
|
||||
# derivative works, distribute, and otherwise use Python alone or in any
|
||||
# derivative version, provided, however, that PSF's License Agreement and
|
||||
# PSF's notice of copyright, i.e., "Copyright (c) 2001, 2002, 2003, 2004,
|
||||
# 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
|
||||
# 2017, 2018 Python Software Foundation; All Rights Reserved" are retained in
|
||||
# Python alone or in any derivative version prepared by Licensee.
|
||||
|
||||
# 3. In the event Licensee prepares a derivative work that is based on or
|
||||
# incorporates Python or any part thereof, and wants to make the derivative
|
||||
# work available to others as provided herein, then Licensee hereby agrees to
|
||||
# include in any such work a brief summary of the changes made to Python.
|
||||
|
||||
# 4. PSF is making Python available to Licensee on an "AS IS" basis. PSF MAKES
|
||||
# NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT
|
||||
# NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF
|
||||
# MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF
|
||||
# PYTHON WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
|
||||
|
||||
# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY
|
||||
# INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF
|
||||
# MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE
|
||||
# THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
|
||||
|
||||
# 6. This License Agreement will automatically terminate upon a material
|
||||
# breach of its terms and conditions.
|
||||
|
||||
# 7. Nothing in this License Agreement shall be deemed to create any
|
||||
# relationship of agency, partnership, or joint venture between PSF and
|
||||
# Licensee. This License Agreement does not grant permission to use PSF
|
||||
# trademarks or trade name in a trademark sense to endorse or promote products
|
||||
# or services of Licensee, or any third party.
|
||||
|
||||
# 8. By copying, installing or otherwise using Python, Licensee agrees to be
|
||||
# bound by the terms and conditions of this License Agreement.
|
||||
|
||||
|
||||
# Brief summary of changes to original code:
|
||||
# - "compact" parameter is supported for dicts, not just lists or tuples
|
||||
# - estimators have a custom handler, they're not just treated as objects
|
||||
# - long sequences (lists, tuples, dict items) with more than N elements are
|
||||
# shortened using ellipsis (', ...') at the end.
|
||||
|
||||
from inspect import signature
|
||||
import pprint
|
||||
from collections import OrderedDict
|
||||
|
||||
from ..base import BaseEstimator
|
||||
from .._config import get_config
|
||||
from . import is_scalar_nan
|
||||
|
||||
|
||||
class KeyValTuple(tuple):
|
||||
"""Dummy class for correctly rendering key-value tuples from dicts."""
|
||||
def __repr__(self):
|
||||
# needed for _dispatch[tuple.__repr__] not to be overridden
|
||||
return super().__repr__()
|
||||
|
||||
|
||||
class KeyValTupleParam(KeyValTuple):
|
||||
"""Dummy class for correctly rendering key-value tuples from parameters."""
|
||||
pass
|
||||
|
||||
|
||||
def _changed_params(estimator):
|
||||
"""Return dict (param_name: value) of parameters that were given to
|
||||
estimator with non-default values."""
|
||||
|
||||
params = estimator.get_params(deep=False)
|
||||
filtered_params = {}
|
||||
init_func = getattr(estimator.__init__, 'deprecated_original',
|
||||
estimator.__init__)
|
||||
init_params = signature(init_func).parameters
|
||||
init_params = {name: param.default for name, param in init_params.items()}
|
||||
|
||||
for k, v in params.items():
|
||||
if (k not in init_params or ( # happens if k is part of a **kwargs
|
||||
repr(v) != repr(init_params[k]) and
|
||||
not (is_scalar_nan(init_params[k]) and is_scalar_nan(v)))):
|
||||
filtered_params[k] = v
|
||||
return filtered_params
|
||||
|
||||
|
||||
class _EstimatorPrettyPrinter(pprint.PrettyPrinter):
|
||||
"""Pretty Printer class for estimator objects.
|
||||
|
||||
This extends the pprint.PrettyPrinter class, because:
|
||||
- we need estimators to be printed with their parameters, e.g.
|
||||
Estimator(param1=value1, ...) which is not supported by default.
|
||||
- the 'compact' parameter of PrettyPrinter is ignored for dicts, which
|
||||
may lead to very long representations that we want to avoid.
|
||||
|
||||
Quick overview of pprint.PrettyPrinter (see also
|
||||
https://stackoverflow.com/questions/49565047/pprint-with-hex-numbers):
|
||||
|
||||
- the entry point is the _format() method which calls format() (overridden
|
||||
here)
|
||||
- format() directly calls _safe_repr() for a first try at rendering the
|
||||
object
|
||||
- _safe_repr formats the whole object reccursively, only calling itself,
|
||||
not caring about line length or anything
|
||||
- back to _format(), if the output string is too long, _format() then calls
|
||||
the appropriate _pprint_TYPE() method (e.g. _pprint_list()) depending on
|
||||
the type of the object. This where the line length and the compact
|
||||
parameters are taken into account.
|
||||
- those _pprint_TYPE() methods will internally use the format() method for
|
||||
rendering the nested objects of an object (e.g. the elements of a list)
|
||||
|
||||
In the end, everything has to be implemented twice: in _safe_repr and in
|
||||
the custom _pprint_TYPE methods. Unfortunately PrettyPrinter is really not
|
||||
straightforward to extend (especially when we want a compact output), so
|
||||
the code is a bit convoluted.
|
||||
|
||||
This class overrides:
|
||||
- format() to support the changed_only parameter
|
||||
- _safe_repr to support printing of estimators (for when they fit on a
|
||||
single line)
|
||||
- _format_dict_items so that dict are correctly 'compacted'
|
||||
- _format_items so that ellipsis is used on long lists and tuples
|
||||
|
||||
When estimators cannot be printed on a single line, the builtin _format()
|
||||
will call _pprint_estimator() because it was registered to do so (see
|
||||
_dispatch[BaseEstimator.__repr__] = _pprint_estimator).
|
||||
|
||||
both _format_dict_items() and _pprint_estimator() use the
|
||||
_format_params_or_dict_items() method that will format parameters and
|
||||
key-value pairs respecting the compact parameter. This method needs another
|
||||
subroutine _pprint_key_val_tuple() used when a parameter or a key-value
|
||||
pair is too long to fit on a single line. This subroutine is called in
|
||||
_format() and is registered as well in the _dispatch dict (just like
|
||||
_pprint_estimator). We had to create the two classes KeyValTuple and
|
||||
KeyValTupleParam for this.
|
||||
"""
|
||||
|
||||
def __init__(self, indent=1, width=80, depth=None, stream=None, *,
|
||||
compact=False, indent_at_name=True,
|
||||
n_max_elements_to_show=None):
|
||||
super().__init__(indent, width, depth, stream, compact=compact)
|
||||
self._indent_at_name = indent_at_name
|
||||
if self._indent_at_name:
|
||||
self._indent_per_level = 1 # ignore indent param
|
||||
self._changed_only = get_config()['print_changed_only']
|
||||
# Max number of elements in a list, dict, tuple until we start using
|
||||
# ellipsis. This also affects the number of arguments of an estimators
|
||||
# (they are treated as dicts)
|
||||
self.n_max_elements_to_show = n_max_elements_to_show
|
||||
|
||||
def format(self, object, context, maxlevels, level):
|
||||
return _safe_repr(object, context, maxlevels, level,
|
||||
changed_only=self._changed_only)
|
||||
|
||||
def _pprint_estimator(self, object, stream, indent, allowance, context,
|
||||
level):
|
||||
stream.write(object.__class__.__name__ + '(')
|
||||
if self._indent_at_name:
|
||||
indent += len(object.__class__.__name__)
|
||||
|
||||
if self._changed_only:
|
||||
params = _changed_params(object)
|
||||
else:
|
||||
params = object.get_params(deep=False)
|
||||
|
||||
params = OrderedDict((name, val)
|
||||
for (name, val) in sorted(params.items()))
|
||||
|
||||
self._format_params(params.items(), stream, indent, allowance + 1,
|
||||
context, level)
|
||||
stream.write(')')
|
||||
|
||||
def _format_dict_items(self, items, stream, indent, allowance, context,
|
||||
level):
|
||||
return self._format_params_or_dict_items(
|
||||
items, stream, indent, allowance, context, level, is_dict=True)
|
||||
|
||||
def _format_params(self, items, stream, indent, allowance, context, level):
|
||||
return self._format_params_or_dict_items(
|
||||
items, stream, indent, allowance, context, level, is_dict=False)
|
||||
|
||||
def _format_params_or_dict_items(self, object, stream, indent, allowance,
|
||||
context, level, is_dict):
|
||||
"""Format dict items or parameters respecting the compact=True
|
||||
parameter. For some reason, the builtin rendering of dict items doesn't
|
||||
respect compact=True and will use one line per key-value if all cannot
|
||||
fit in a single line.
|
||||
Dict items will be rendered as <'key': value> while params will be
|
||||
rendered as <key=value>. The implementation is mostly copy/pasting from
|
||||
the builtin _format_items().
|
||||
This also adds ellipsis if the number of items is greater than
|
||||
self.n_max_elements_to_show.
|
||||
"""
|
||||
write = stream.write
|
||||
indent += self._indent_per_level
|
||||
delimnl = ',\n' + ' ' * indent
|
||||
delim = ''
|
||||
width = max_width = self._width - indent + 1
|
||||
it = iter(object)
|
||||
try:
|
||||
next_ent = next(it)
|
||||
except StopIteration:
|
||||
return
|
||||
last = False
|
||||
n_items = 0
|
||||
while not last:
|
||||
if n_items == self.n_max_elements_to_show:
|
||||
write(', ...')
|
||||
break
|
||||
n_items += 1
|
||||
ent = next_ent
|
||||
try:
|
||||
next_ent = next(it)
|
||||
except StopIteration:
|
||||
last = True
|
||||
max_width -= allowance
|
||||
width -= allowance
|
||||
if self._compact:
|
||||
k, v = ent
|
||||
krepr = self._repr(k, context, level)
|
||||
vrepr = self._repr(v, context, level)
|
||||
if not is_dict:
|
||||
krepr = krepr.strip("'")
|
||||
middle = ': ' if is_dict else '='
|
||||
rep = krepr + middle + vrepr
|
||||
w = len(rep) + 2
|
||||
if width < w:
|
||||
width = max_width
|
||||
if delim:
|
||||
delim = delimnl
|
||||
if width >= w:
|
||||
width -= w
|
||||
write(delim)
|
||||
delim = ', '
|
||||
write(rep)
|
||||
continue
|
||||
write(delim)
|
||||
delim = delimnl
|
||||
class_ = KeyValTuple if is_dict else KeyValTupleParam
|
||||
self._format(class_(ent), stream, indent,
|
||||
allowance if last else 1, context, level)
|
||||
|
||||
def _format_items(self, items, stream, indent, allowance, context, level):
|
||||
"""Format the items of an iterable (list, tuple...). Same as the
|
||||
built-in _format_items, with support for ellipsis if the number of
|
||||
elements is greater than self.n_max_elements_to_show.
|
||||
"""
|
||||
write = stream.write
|
||||
indent += self._indent_per_level
|
||||
if self._indent_per_level > 1:
|
||||
write((self._indent_per_level - 1) * ' ')
|
||||
delimnl = ',\n' + ' ' * indent
|
||||
delim = ''
|
||||
width = max_width = self._width - indent + 1
|
||||
it = iter(items)
|
||||
try:
|
||||
next_ent = next(it)
|
||||
except StopIteration:
|
||||
return
|
||||
last = False
|
||||
n_items = 0
|
||||
while not last:
|
||||
if n_items == self.n_max_elements_to_show:
|
||||
write(', ...')
|
||||
break
|
||||
n_items += 1
|
||||
ent = next_ent
|
||||
try:
|
||||
next_ent = next(it)
|
||||
except StopIteration:
|
||||
last = True
|
||||
max_width -= allowance
|
||||
width -= allowance
|
||||
if self._compact:
|
||||
rep = self._repr(ent, context, level)
|
||||
w = len(rep) + 2
|
||||
if width < w:
|
||||
width = max_width
|
||||
if delim:
|
||||
delim = delimnl
|
||||
if width >= w:
|
||||
width -= w
|
||||
write(delim)
|
||||
delim = ', '
|
||||
write(rep)
|
||||
continue
|
||||
write(delim)
|
||||
delim = delimnl
|
||||
self._format(ent, stream, indent,
|
||||
allowance if last else 1, context, level)
|
||||
|
||||
def _pprint_key_val_tuple(self, object, stream, indent, allowance, context,
|
||||
level):
|
||||
"""Pretty printing for key-value tuples from dict or parameters."""
|
||||
k, v = object
|
||||
rep = self._repr(k, context, level)
|
||||
if isinstance(object, KeyValTupleParam):
|
||||
rep = rep.strip("'")
|
||||
middle = '='
|
||||
else:
|
||||
middle = ': '
|
||||
stream.write(rep)
|
||||
stream.write(middle)
|
||||
self._format(v, stream, indent + len(rep) + len(middle), allowance,
|
||||
context, level)
|
||||
|
||||
# Note: need to copy _dispatch to prevent instances of the builtin
|
||||
# PrettyPrinter class to call methods of _EstimatorPrettyPrinter (see issue
|
||||
# 12906)
|
||||
# mypy error: "Type[PrettyPrinter]" has no attribute "_dispatch"
|
||||
_dispatch = pprint.PrettyPrinter._dispatch.copy() # type: ignore
|
||||
_dispatch[BaseEstimator.__repr__] = _pprint_estimator
|
||||
_dispatch[KeyValTuple.__repr__] = _pprint_key_val_tuple
|
||||
|
||||
|
||||
def _safe_repr(object, context, maxlevels, level, changed_only=False):
|
||||
"""Same as the builtin _safe_repr, with added support for Estimator
|
||||
objects."""
|
||||
typ = type(object)
|
||||
|
||||
if typ in pprint._builtin_scalars:
|
||||
return repr(object), True, False
|
||||
|
||||
r = getattr(typ, "__repr__", None)
|
||||
if issubclass(typ, dict) and r is dict.__repr__:
|
||||
if not object:
|
||||
return "{}", True, False
|
||||
objid = id(object)
|
||||
if maxlevels and level >= maxlevels:
|
||||
return "{...}", False, objid in context
|
||||
if objid in context:
|
||||
return pprint._recursion(object), False, True
|
||||
context[objid] = 1
|
||||
readable = True
|
||||
recursive = False
|
||||
components = []
|
||||
append = components.append
|
||||
level += 1
|
||||
saferepr = _safe_repr
|
||||
items = sorted(object.items(), key=pprint._safe_tuple)
|
||||
for k, v in items:
|
||||
krepr, kreadable, krecur = saferepr(
|
||||
k, context, maxlevels, level, changed_only=changed_only)
|
||||
vrepr, vreadable, vrecur = saferepr(
|
||||
v, context, maxlevels, level, changed_only=changed_only)
|
||||
append("%s: %s" % (krepr, vrepr))
|
||||
readable = readable and kreadable and vreadable
|
||||
if krecur or vrecur:
|
||||
recursive = True
|
||||
del context[objid]
|
||||
return "{%s}" % ", ".join(components), readable, recursive
|
||||
|
||||
if (issubclass(typ, list) and r is list.__repr__) or \
|
||||
(issubclass(typ, tuple) and r is tuple.__repr__):
|
||||
if issubclass(typ, list):
|
||||
if not object:
|
||||
return "[]", True, False
|
||||
format = "[%s]"
|
||||
elif len(object) == 1:
|
||||
format = "(%s,)"
|
||||
else:
|
||||
if not object:
|
||||
return "()", True, False
|
||||
format = "(%s)"
|
||||
objid = id(object)
|
||||
if maxlevels and level >= maxlevels:
|
||||
return format % "...", False, objid in context
|
||||
if objid in context:
|
||||
return pprint._recursion(object), False, True
|
||||
context[objid] = 1
|
||||
readable = True
|
||||
recursive = False
|
||||
components = []
|
||||
append = components.append
|
||||
level += 1
|
||||
for o in object:
|
||||
orepr, oreadable, orecur = _safe_repr(
|
||||
o, context, maxlevels, level, changed_only=changed_only)
|
||||
append(orepr)
|
||||
if not oreadable:
|
||||
readable = False
|
||||
if orecur:
|
||||
recursive = True
|
||||
del context[objid]
|
||||
return format % ", ".join(components), readable, recursive
|
||||
|
||||
if issubclass(typ, BaseEstimator):
|
||||
objid = id(object)
|
||||
if maxlevels and level >= maxlevels:
|
||||
return "{...}", False, objid in context
|
||||
if objid in context:
|
||||
return pprint._recursion(object), False, True
|
||||
context[objid] = 1
|
||||
readable = True
|
||||
recursive = False
|
||||
if changed_only:
|
||||
params = _changed_params(object)
|
||||
else:
|
||||
params = object.get_params(deep=False)
|
||||
components = []
|
||||
append = components.append
|
||||
level += 1
|
||||
saferepr = _safe_repr
|
||||
items = sorted(params.items(), key=pprint._safe_tuple)
|
||||
for k, v in items:
|
||||
krepr, kreadable, krecur = saferepr(
|
||||
k, context, maxlevels, level, changed_only=changed_only)
|
||||
vrepr, vreadable, vrecur = saferepr(
|
||||
v, context, maxlevels, level, changed_only=changed_only)
|
||||
append("%s=%s" % (krepr.strip("'"), vrepr))
|
||||
readable = readable and kreadable and vreadable
|
||||
if krecur or vrecur:
|
||||
recursive = True
|
||||
del context[objid]
|
||||
return ("%s(%s)" % (typ.__name__, ", ".join(components)), readable,
|
||||
recursive)
|
||||
|
||||
rep = repr(object)
|
||||
return rep, (rep and not rep.startswith('<')), False
|
BIN
venv/Lib/site-packages/sklearn/utils/_random.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/sklearn/utils/_random.cp36-win32.pyd
Normal file
Binary file not shown.
44
venv/Lib/site-packages/sklearn/utils/_random.pxd
Normal file
44
venv/Lib/site-packages/sklearn/utils/_random.pxd
Normal file
|
@ -0,0 +1,44 @@
|
|||
# Authors: Arnaud Joly
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
|
||||
import numpy as np
|
||||
cimport numpy as np
|
||||
ctypedef np.npy_uint32 UINT32_t
|
||||
|
||||
cdef inline UINT32_t DEFAULT_SEED = 1
|
||||
|
||||
cdef enum:
|
||||
# Max value for our rand_r replacement (near the bottom).
|
||||
# We don't use RAND_MAX because it's different across platforms and
|
||||
# particularly tiny on Windows/MSVC.
|
||||
RAND_R_MAX = 0x7FFFFFFF
|
||||
|
||||
cpdef sample_without_replacement(np.int_t n_population,
|
||||
np.int_t n_samples,
|
||||
method=*,
|
||||
random_state=*)
|
||||
|
||||
# rand_r replacement using a 32bit XorShift generator
|
||||
# See http://www.jstatsoft.org/v08/i14/paper for details
|
||||
cdef inline UINT32_t our_rand_r(UINT32_t* seed) nogil:
|
||||
"""Generate a pseudo-random np.uint32 from a np.uint32 seed"""
|
||||
# seed shouldn't ever be 0.
|
||||
if (seed[0] == 0): seed[0] = DEFAULT_SEED
|
||||
|
||||
seed[0] ^= <UINT32_t>(seed[0] << 13)
|
||||
seed[0] ^= <UINT32_t>(seed[0] >> 17)
|
||||
seed[0] ^= <UINT32_t>(seed[0] << 5)
|
||||
|
||||
# Note: we must be careful with the final line cast to np.uint32 so that
|
||||
# the function behaves consistently across platforms.
|
||||
#
|
||||
# The following cast might yield different results on different platforms:
|
||||
# wrong_cast = <UINT32_t> RAND_R_MAX + 1
|
||||
#
|
||||
# We can use:
|
||||
# good_cast = <UINT32_t>(RAND_R_MAX + 1)
|
||||
# or:
|
||||
# cdef np.uint32_t another_good_cast = <UINT32_t>RAND_R_MAX + 1
|
||||
return seed[0] % <UINT32_t>(RAND_R_MAX + 1)
|
BIN
venv/Lib/site-packages/sklearn/utils/_seq_dataset.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/sklearn/utils/_seq_dataset.cp36-win32.pyd
Normal file
Binary file not shown.
116
venv/Lib/site-packages/sklearn/utils/_seq_dataset.pxd
Normal file
116
venv/Lib/site-packages/sklearn/utils/_seq_dataset.pxd
Normal file
|
@ -0,0 +1,116 @@
|
|||
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
"""
|
||||
Dataset abstractions for sequential data access.
|
||||
WARNING: Do not edit .pxd file directly, it is generated from .pxd.tp
|
||||
"""
|
||||
|
||||
cimport numpy as np
|
||||
|
||||
# SequentialDataset and its two concrete subclasses are (optionally randomized)
|
||||
# iterators over the rows of a matrix X and corresponding target values y.
|
||||
|
||||
|
||||
cdef class SequentialDataset64:
|
||||
cdef int current_index
|
||||
cdef np.ndarray index
|
||||
cdef int *index_data_ptr
|
||||
cdef Py_ssize_t n_samples
|
||||
cdef np.uint32_t seed
|
||||
|
||||
cdef void shuffle(self, np.uint32_t seed) nogil
|
||||
cdef int _get_next_index(self) nogil
|
||||
cdef int _get_random_index(self) nogil
|
||||
|
||||
cdef void _sample(self, double **x_data_ptr, int **x_ind_ptr,
|
||||
int *nnz, double *y, double *sample_weight,
|
||||
int current_index) nogil
|
||||
cdef void next(self, double **x_data_ptr, int **x_ind_ptr,
|
||||
int *nnz, double *y, double *sample_weight) nogil
|
||||
cdef int random(self, double **x_data_ptr, int **x_ind_ptr,
|
||||
int *nnz, double *y, double *sample_weight) nogil
|
||||
|
||||
|
||||
cdef class ArrayDataset64(SequentialDataset64):
|
||||
cdef np.ndarray X
|
||||
cdef np.ndarray Y
|
||||
cdef np.ndarray sample_weights
|
||||
cdef Py_ssize_t n_features
|
||||
cdef np.npy_intp X_stride
|
||||
cdef double *X_data_ptr
|
||||
cdef double *Y_data_ptr
|
||||
cdef np.ndarray feature_indices
|
||||
cdef int *feature_indices_ptr
|
||||
cdef double *sample_weight_data
|
||||
|
||||
|
||||
cdef class CSRDataset64(SequentialDataset64):
|
||||
cdef np.ndarray X_data
|
||||
cdef np.ndarray X_indptr
|
||||
cdef np.ndarray X_indices
|
||||
cdef np.ndarray Y
|
||||
cdef np.ndarray sample_weights
|
||||
cdef double *X_data_ptr
|
||||
cdef int *X_indptr_ptr
|
||||
cdef int *X_indices_ptr
|
||||
cdef double *Y_data_ptr
|
||||
cdef double *sample_weight_data
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
"""
|
||||
Dataset abstractions for sequential data access.
|
||||
WARNING: Do not edit .pxd file directly, it is generated from .pxd.tp
|
||||
"""
|
||||
|
||||
cimport numpy as np
|
||||
|
||||
# SequentialDataset and its two concrete subclasses are (optionally randomized)
|
||||
# iterators over the rows of a matrix X and corresponding target values y.
|
||||
|
||||
|
||||
cdef class SequentialDataset32:
|
||||
cdef int current_index
|
||||
cdef np.ndarray index
|
||||
cdef int *index_data_ptr
|
||||
cdef Py_ssize_t n_samples
|
||||
cdef np.uint32_t seed
|
||||
|
||||
cdef void shuffle(self, np.uint32_t seed) nogil
|
||||
cdef int _get_next_index(self) nogil
|
||||
cdef int _get_random_index(self) nogil
|
||||
|
||||
cdef void _sample(self, float **x_data_ptr, int **x_ind_ptr,
|
||||
int *nnz, float *y, float *sample_weight,
|
||||
int current_index) nogil
|
||||
cdef void next(self, float **x_data_ptr, int **x_ind_ptr,
|
||||
int *nnz, float *y, float *sample_weight) nogil
|
||||
cdef int random(self, float **x_data_ptr, int **x_ind_ptr,
|
||||
int *nnz, float *y, float *sample_weight) nogil
|
||||
|
||||
|
||||
cdef class ArrayDataset32(SequentialDataset32):
|
||||
cdef np.ndarray X
|
||||
cdef np.ndarray Y
|
||||
cdef np.ndarray sample_weights
|
||||
cdef Py_ssize_t n_features
|
||||
cdef np.npy_intp X_stride
|
||||
cdef float *X_data_ptr
|
||||
cdef float *Y_data_ptr
|
||||
cdef np.ndarray feature_indices
|
||||
cdef int *feature_indices_ptr
|
||||
cdef float *sample_weight_data
|
||||
|
||||
|
||||
cdef class CSRDataset32(SequentialDataset32):
|
||||
cdef np.ndarray X_data
|
||||
cdef np.ndarray X_indptr
|
||||
cdef np.ndarray X_indices
|
||||
cdef np.ndarray Y
|
||||
cdef np.ndarray sample_weights
|
||||
cdef float *X_data_ptr
|
||||
cdef int *X_indptr_ptr
|
||||
cdef int *X_indices_ptr
|
||||
cdef float *Y_data_ptr
|
||||
cdef float *sample_weight_data
|
94
venv/Lib/site-packages/sklearn/utils/_show_versions.py
Normal file
94
venv/Lib/site-packages/sklearn/utils/_show_versions.py
Normal file
|
@ -0,0 +1,94 @@
|
|||
"""
|
||||
Utility methods to print system info for debugging
|
||||
|
||||
adapted from :func:`pandas.show_versions`
|
||||
"""
|
||||
# License: BSD 3 clause
|
||||
|
||||
import platform
|
||||
import sys
|
||||
import importlib
|
||||
|
||||
from ._openmp_helpers import _openmp_parallelism_enabled
|
||||
|
||||
|
||||
def _get_sys_info():
|
||||
"""System information
|
||||
|
||||
Return
|
||||
------
|
||||
sys_info : dict
|
||||
system and Python version information
|
||||
|
||||
"""
|
||||
python = sys.version.replace('\n', ' ')
|
||||
|
||||
blob = [
|
||||
("python", python),
|
||||
('executable', sys.executable),
|
||||
("machine", platform.platform()),
|
||||
]
|
||||
|
||||
return dict(blob)
|
||||
|
||||
|
||||
def _get_deps_info():
|
||||
"""Overview of the installed version of main dependencies
|
||||
|
||||
Returns
|
||||
-------
|
||||
deps_info: dict
|
||||
version information on relevant Python libraries
|
||||
|
||||
"""
|
||||
deps = [
|
||||
"pip",
|
||||
"setuptools",
|
||||
"sklearn",
|
||||
"numpy",
|
||||
"scipy",
|
||||
"Cython",
|
||||
"pandas",
|
||||
"matplotlib",
|
||||
"joblib",
|
||||
"threadpoolctl"
|
||||
]
|
||||
|
||||
def get_version(module):
|
||||
return module.__version__
|
||||
|
||||
deps_info = {}
|
||||
|
||||
for modname in deps:
|
||||
try:
|
||||
if modname in sys.modules:
|
||||
mod = sys.modules[modname]
|
||||
else:
|
||||
mod = importlib.import_module(modname)
|
||||
ver = get_version(mod)
|
||||
deps_info[modname] = ver
|
||||
except ImportError:
|
||||
deps_info[modname] = None
|
||||
|
||||
return deps_info
|
||||
|
||||
|
||||
def show_versions():
|
||||
"""Print useful debugging information"
|
||||
|
||||
.. versionadded:: 0.20
|
||||
"""
|
||||
|
||||
sys_info = _get_sys_info()
|
||||
deps_info = _get_deps_info()
|
||||
|
||||
print('\nSystem:')
|
||||
for k, stat in sys_info.items():
|
||||
print("{k:>10}: {stat}".format(k=k, stat=stat))
|
||||
|
||||
print('\nPython dependencies:')
|
||||
for k, stat in deps_info.items():
|
||||
print("{k:>13}: {stat}".format(k=k, stat=stat))
|
||||
|
||||
print("\n{k}: {stat}".format(k="Built with OpenMP",
|
||||
stat=_openmp_parallelism_enabled()))
|
873
venv/Lib/site-packages/sklearn/utils/_testing.py
Normal file
873
venv/Lib/site-packages/sklearn/utils/_testing.py
Normal file
|
@ -0,0 +1,873 @@
|
|||
"""Testing utilities."""
|
||||
|
||||
# Copyright (c) 2011, 2012
|
||||
# Authors: Pietro Berkes,
|
||||
# Andreas Muller
|
||||
# Mathieu Blondel
|
||||
# Olivier Grisel
|
||||
# Arnaud Joly
|
||||
# Denis Engemann
|
||||
# Giorgio Patrini
|
||||
# Thierry Guillemot
|
||||
# License: BSD 3 clause
|
||||
import os
|
||||
import os.path as op
|
||||
import inspect
|
||||
import pkgutil
|
||||
import warnings
|
||||
import sys
|
||||
import functools
|
||||
import tempfile
|
||||
from subprocess import check_output, STDOUT, CalledProcessError
|
||||
from subprocess import TimeoutExpired
|
||||
|
||||
import scipy as sp
|
||||
from functools import wraps
|
||||
from operator import itemgetter
|
||||
from inspect import signature
|
||||
|
||||
import shutil
|
||||
import atexit
|
||||
import unittest
|
||||
from unittest import TestCase
|
||||
|
||||
# WindowsError only exist on Windows
|
||||
try:
|
||||
WindowsError
|
||||
except NameError:
|
||||
WindowsError = None
|
||||
|
||||
from numpy.testing import assert_allclose
|
||||
from numpy.testing import assert_almost_equal
|
||||
from numpy.testing import assert_approx_equal
|
||||
from numpy.testing import assert_array_equal
|
||||
from numpy.testing import assert_array_almost_equal
|
||||
from numpy.testing import assert_array_less
|
||||
import numpy as np
|
||||
import joblib
|
||||
|
||||
import sklearn
|
||||
from sklearn.base import (BaseEstimator, ClassifierMixin, ClusterMixin,
|
||||
RegressorMixin, TransformerMixin)
|
||||
from sklearn.utils import deprecated, IS_PYPY, _IS_32BIT
|
||||
|
||||
|
||||
__all__ = ["assert_equal", "assert_not_equal", "assert_raises",
|
||||
"assert_raises_regexp",
|
||||
"assert_almost_equal", "assert_array_equal",
|
||||
"assert_array_almost_equal", "assert_array_less",
|
||||
"assert_less", "assert_less_equal",
|
||||
"assert_greater", "assert_greater_equal",
|
||||
"assert_approx_equal", "assert_allclose",
|
||||
"assert_run_python_script", "SkipTest", "all_estimators"]
|
||||
|
||||
_dummy = TestCase('__init__')
|
||||
deprecation_message = (
|
||||
'This helper is deprecated in version 0.22 and will be removed in version '
|
||||
'0.24. Please use "assert" instead'
|
||||
)
|
||||
assert_equal = deprecated(deprecation_message)(_dummy.assertEqual)
|
||||
assert_not_equal = deprecated(deprecation_message)(_dummy.assertNotEqual)
|
||||
assert_raises = _dummy.assertRaises
|
||||
SkipTest = unittest.case.SkipTest
|
||||
assert_dict_equal = _dummy.assertDictEqual
|
||||
assert_in = deprecated(deprecation_message)(_dummy.assertIn)
|
||||
assert_not_in = deprecated(deprecation_message)(_dummy.assertNotIn)
|
||||
assert_less = deprecated(deprecation_message)(_dummy.assertLess)
|
||||
assert_greater = deprecated(deprecation_message)(_dummy.assertGreater)
|
||||
assert_less_equal = deprecated(deprecation_message)(_dummy.assertLessEqual)
|
||||
assert_greater_equal = deprecated(deprecation_message)(
|
||||
_dummy.assertGreaterEqual)
|
||||
|
||||
assert_raises_regex = _dummy.assertRaisesRegex
|
||||
# assert_raises_regexp is deprecated in Python 3.4 in favor of
|
||||
# assert_raises_regex but lets keep the backward compat in scikit-learn with
|
||||
# the old name for now
|
||||
assert_raises_regexp = assert_raises_regex
|
||||
|
||||
|
||||
def assert_warns(warning_class, func, *args, **kw):
|
||||
"""Test that a certain warning occurs.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
warning_class : the warning class
|
||||
The class to test for, e.g. UserWarning.
|
||||
|
||||
func : callable
|
||||
Callable object to trigger warnings.
|
||||
|
||||
*args : the positional arguments to `func`.
|
||||
|
||||
**kw : the keyword arguments to `func`
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
result : the return value of `func`
|
||||
|
||||
"""
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
# Cause all warnings to always be triggered.
|
||||
warnings.simplefilter("always")
|
||||
# Trigger a warning.
|
||||
result = func(*args, **kw)
|
||||
if hasattr(np, 'FutureWarning'):
|
||||
# Filter out numpy-specific warnings in numpy >= 1.9
|
||||
w = [e for e in w
|
||||
if e.category is not np.VisibleDeprecationWarning]
|
||||
|
||||
# Verify some things
|
||||
if not len(w) > 0:
|
||||
raise AssertionError("No warning raised when calling %s"
|
||||
% func.__name__)
|
||||
|
||||
found = any(warning.category is warning_class for warning in w)
|
||||
if not found:
|
||||
raise AssertionError("%s did not give warning: %s( is %s)"
|
||||
% (func.__name__, warning_class, w))
|
||||
return result
|
||||
|
||||
|
||||
def assert_warns_message(warning_class, message, func, *args, **kw):
|
||||
# very important to avoid uncontrolled state propagation
|
||||
"""Test that a certain warning occurs and with a certain message.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
warning_class : the warning class
|
||||
The class to test for, e.g. UserWarning.
|
||||
|
||||
message : str | callable
|
||||
The message or a substring of the message to test for. If callable,
|
||||
it takes a string as the argument and will trigger an AssertionError
|
||||
if the callable returns `False`.
|
||||
|
||||
func : callable
|
||||
Callable object to trigger warnings.
|
||||
|
||||
*args : the positional arguments to `func`.
|
||||
|
||||
**kw : the keyword arguments to `func`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : the return value of `func`
|
||||
|
||||
"""
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
# Cause all warnings to always be triggered.
|
||||
warnings.simplefilter("always")
|
||||
if hasattr(np, 'FutureWarning'):
|
||||
# Let's not catch the numpy internal DeprecationWarnings
|
||||
warnings.simplefilter('ignore', np.VisibleDeprecationWarning)
|
||||
# Trigger a warning.
|
||||
result = func(*args, **kw)
|
||||
# Verify some things
|
||||
if not len(w) > 0:
|
||||
raise AssertionError("No warning raised when calling %s"
|
||||
% func.__name__)
|
||||
|
||||
found = [issubclass(warning.category, warning_class) for warning in w]
|
||||
if not any(found):
|
||||
raise AssertionError("No warning raised for %s with class "
|
||||
"%s"
|
||||
% (func.__name__, warning_class))
|
||||
|
||||
message_found = False
|
||||
# Checks the message of all warnings belong to warning_class
|
||||
for index in [i for i, x in enumerate(found) if x]:
|
||||
# substring will match, the entire message with typo won't
|
||||
msg = w[index].message # For Python 3 compatibility
|
||||
msg = str(msg.args[0] if hasattr(msg, 'args') else msg)
|
||||
if callable(message): # add support for certain tests
|
||||
check_in_message = message
|
||||
else:
|
||||
def check_in_message(msg): return message in msg
|
||||
|
||||
if check_in_message(msg):
|
||||
message_found = True
|
||||
break
|
||||
|
||||
if not message_found:
|
||||
raise AssertionError("Did not receive the message you expected "
|
||||
"('%s') for <%s>, got: '%s'"
|
||||
% (message, func.__name__, msg))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def assert_warns_div0(func, *args, **kw):
|
||||
"""Assume that numpy's warning for divide by zero is raised
|
||||
|
||||
Handles the case of platforms that do not support warning on divide by zero
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func
|
||||
*args
|
||||
**kw
|
||||
"""
|
||||
|
||||
with np.errstate(divide='warn', invalid='warn'):
|
||||
try:
|
||||
assert_warns(RuntimeWarning, np.divide, 1, np.zeros(1))
|
||||
except AssertionError:
|
||||
# This platform does not report numpy divide by zeros
|
||||
return func(*args, **kw)
|
||||
return assert_warns_message(RuntimeWarning,
|
||||
'invalid value encountered',
|
||||
func, *args, **kw)
|
||||
|
||||
|
||||
# To remove when we support numpy 1.7
|
||||
def assert_no_warnings(func, *args, **kw):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
func
|
||||
*args
|
||||
**kw
|
||||
"""
|
||||
# very important to avoid uncontrolled state propagation
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter('always')
|
||||
|
||||
result = func(*args, **kw)
|
||||
if hasattr(np, 'FutureWarning'):
|
||||
# Filter out numpy-specific warnings in numpy >= 1.9
|
||||
w = [e for e in w
|
||||
if e.category is not np.VisibleDeprecationWarning]
|
||||
|
||||
if len(w) > 0:
|
||||
raise AssertionError("Got warnings when calling %s: [%s]"
|
||||
% (func.__name__,
|
||||
', '.join(str(warning) for warning in w)))
|
||||
return result
|
||||
|
||||
|
||||
def ignore_warnings(obj=None, category=Warning):
|
||||
"""Context manager and decorator to ignore warnings.
|
||||
|
||||
Note: Using this (in both variants) will clear all warnings
|
||||
from all python modules loaded. In case you need to test
|
||||
cross-module-warning-logging, this is not your tool of choice.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
obj : callable or None
|
||||
callable where you want to ignore the warnings.
|
||||
category : warning class, defaults to Warning.
|
||||
The category to filter. If Warning, all categories will be muted.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> with ignore_warnings():
|
||||
... warnings.warn('buhuhuhu')
|
||||
|
||||
>>> def nasty_warn():
|
||||
... warnings.warn('buhuhuhu')
|
||||
... print(42)
|
||||
|
||||
>>> ignore_warnings(nasty_warn)()
|
||||
42
|
||||
"""
|
||||
if isinstance(obj, type) and issubclass(obj, Warning):
|
||||
# Avoid common pitfall of passing category as the first positional
|
||||
# argument which result in the test not being run
|
||||
warning_name = obj.__name__
|
||||
raise ValueError(
|
||||
"'obj' should be a callable where you want to ignore warnings. "
|
||||
"You passed a warning class instead: 'obj={warning_name}'. "
|
||||
"If you want to pass a warning class to ignore_warnings, "
|
||||
"you should use 'category={warning_name}'".format(
|
||||
warning_name=warning_name))
|
||||
elif callable(obj):
|
||||
return _IgnoreWarnings(category=category)(obj)
|
||||
else:
|
||||
return _IgnoreWarnings(category=category)
|
||||
|
||||
|
||||
class _IgnoreWarnings:
|
||||
"""Improved and simplified Python warnings context manager and decorator.
|
||||
|
||||
This class allows the user to ignore the warnings raised by a function.
|
||||
Copied from Python 2.7.5 and modified as required.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
category : tuple of warning class, default to Warning
|
||||
The category to filter. By default, all the categories will be muted.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, category):
|
||||
self._record = True
|
||||
self._module = sys.modules['warnings']
|
||||
self._entered = False
|
||||
self.log = []
|
||||
self.category = category
|
||||
|
||||
def __call__(self, fn):
|
||||
"""Decorator to catch and hide warnings without visual nesting."""
|
||||
@wraps(fn)
|
||||
def wrapper(*args, **kwargs):
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", self.category)
|
||||
return fn(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
def __repr__(self):
|
||||
args = []
|
||||
if self._record:
|
||||
args.append("record=True")
|
||||
if self._module is not sys.modules['warnings']:
|
||||
args.append("module=%r" % self._module)
|
||||
name = type(self).__name__
|
||||
return "%s(%s)" % (name, ", ".join(args))
|
||||
|
||||
def __enter__(self):
|
||||
if self._entered:
|
||||
raise RuntimeError("Cannot enter %r twice" % self)
|
||||
self._entered = True
|
||||
self._filters = self._module.filters
|
||||
self._module.filters = self._filters[:]
|
||||
self._showwarning = self._module.showwarning
|
||||
warnings.simplefilter("ignore", self.category)
|
||||
|
||||
def __exit__(self, *exc_info):
|
||||
if not self._entered:
|
||||
raise RuntimeError("Cannot exit %r without entering first" % self)
|
||||
self._module.filters = self._filters
|
||||
self._module.showwarning = self._showwarning
|
||||
self.log[:] = []
|
||||
|
||||
|
||||
def assert_raise_message(exceptions, message, function, *args, **kwargs):
|
||||
"""Helper function to test the message raised in an exception.
|
||||
|
||||
Given an exception, a callable to raise the exception, and
|
||||
a message string, tests that the correct exception is raised and
|
||||
that the message is a substring of the error thrown. Used to test
|
||||
that the specific message thrown during an exception is correct.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
exceptions : exception or tuple of exception
|
||||
An Exception object.
|
||||
|
||||
message : str
|
||||
The error message or a substring of the error message.
|
||||
|
||||
function : callable
|
||||
Callable object to raise error.
|
||||
|
||||
*args : the positional arguments to `function`.
|
||||
|
||||
**kwargs : the keyword arguments to `function`.
|
||||
"""
|
||||
try:
|
||||
function(*args, **kwargs)
|
||||
except exceptions as e:
|
||||
error_message = str(e)
|
||||
if message not in error_message:
|
||||
raise AssertionError("Error message does not include the expected"
|
||||
" string: %r. Observed error message: %r" %
|
||||
(message, error_message))
|
||||
else:
|
||||
# concatenate exception names
|
||||
if isinstance(exceptions, tuple):
|
||||
names = " or ".join(e.__name__ for e in exceptions)
|
||||
else:
|
||||
names = exceptions.__name__
|
||||
|
||||
raise AssertionError("%s not raised by %s" %
|
||||
(names, function.__name__))
|
||||
|
||||
|
||||
def assert_allclose_dense_sparse(x, y, rtol=1e-07, atol=1e-9, err_msg=''):
|
||||
"""Assert allclose for sparse and dense data.
|
||||
|
||||
Both x and y need to be either sparse or dense, they
|
||||
can't be mixed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array-like or sparse matrix
|
||||
First array to compare.
|
||||
|
||||
y : array-like or sparse matrix
|
||||
Second array to compare.
|
||||
|
||||
rtol : float, optional
|
||||
relative tolerance; see numpy.allclose
|
||||
|
||||
atol : float, optional
|
||||
absolute tolerance; see numpy.allclose. Note that the default here is
|
||||
more tolerant than the default for numpy.testing.assert_allclose, where
|
||||
atol=0.
|
||||
|
||||
err_msg : string, default=''
|
||||
Error message to raise.
|
||||
"""
|
||||
if sp.sparse.issparse(x) and sp.sparse.issparse(y):
|
||||
x = x.tocsr()
|
||||
y = y.tocsr()
|
||||
x.sum_duplicates()
|
||||
y.sum_duplicates()
|
||||
assert_array_equal(x.indices, y.indices, err_msg=err_msg)
|
||||
assert_array_equal(x.indptr, y.indptr, err_msg=err_msg)
|
||||
assert_allclose(x.data, y.data, rtol=rtol, atol=atol, err_msg=err_msg)
|
||||
elif not sp.sparse.issparse(x) and not sp.sparse.issparse(y):
|
||||
# both dense
|
||||
assert_allclose(x, y, rtol=rtol, atol=atol, err_msg=err_msg)
|
||||
else:
|
||||
raise ValueError("Can only compare two sparse matrices,"
|
||||
" not a sparse matrix and an array.")
|
||||
|
||||
|
||||
# TODO: Remove in 0.24. This class is now in utils.__init__.
|
||||
def all_estimators(type_filter=None):
|
||||
"""Get a list of all estimators from sklearn.
|
||||
|
||||
This function crawls the module and gets all classes that inherit
|
||||
from BaseEstimator. Classes that are defined in test-modules are not
|
||||
included.
|
||||
By default meta_estimators such as GridSearchCV are also not included.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
type_filter : string, list of string, or None, default=None
|
||||
Which kind of estimators should be returned. If None, no filter is
|
||||
applied and all estimators are returned. Possible values are
|
||||
'classifier', 'regressor', 'cluster' and 'transformer' to get
|
||||
estimators only of these specific types, or a list of these to
|
||||
get the estimators that fit at least one of the types.
|
||||
|
||||
Returns
|
||||
-------
|
||||
estimators : list of tuples
|
||||
List of (name, class), where ``name`` is the class name as string
|
||||
and ``class`` is the actual type of the class.
|
||||
"""
|
||||
def is_abstract(c):
|
||||
if not(hasattr(c, '__abstractmethods__')):
|
||||
return False
|
||||
if not len(c.__abstractmethods__):
|
||||
return False
|
||||
return True
|
||||
|
||||
all_classes = []
|
||||
# get parent folder
|
||||
path = sklearn.__path__
|
||||
for importer, modname, ispkg in pkgutil.walk_packages(
|
||||
path=path, prefix='sklearn.', onerror=lambda x: None):
|
||||
if ".tests." in modname or "externals" in modname:
|
||||
continue
|
||||
if IS_PYPY and ('_svmlight_format_io' in modname or
|
||||
'feature_extraction._hashing_fast' in modname):
|
||||
continue
|
||||
# Ignore deprecation warnings triggered at import time.
|
||||
with ignore_warnings(category=FutureWarning):
|
||||
module = __import__(modname, fromlist="dummy")
|
||||
classes = inspect.getmembers(module, inspect.isclass)
|
||||
all_classes.extend(classes)
|
||||
|
||||
all_classes = set(all_classes)
|
||||
|
||||
estimators = [c for c in all_classes
|
||||
if (issubclass(c[1], BaseEstimator) and
|
||||
c[0] != 'BaseEstimator')]
|
||||
# get rid of abstract base classes
|
||||
estimators = [c for c in estimators if not is_abstract(c[1])]
|
||||
|
||||
if type_filter is not None:
|
||||
if not isinstance(type_filter, list):
|
||||
type_filter = [type_filter]
|
||||
else:
|
||||
type_filter = list(type_filter) # copy
|
||||
filtered_estimators = []
|
||||
filters = {'classifier': ClassifierMixin,
|
||||
'regressor': RegressorMixin,
|
||||
'transformer': TransformerMixin,
|
||||
'cluster': ClusterMixin}
|
||||
for name, mixin in filters.items():
|
||||
if name in type_filter:
|
||||
type_filter.remove(name)
|
||||
filtered_estimators.extend([est for est in estimators
|
||||
if issubclass(est[1], mixin)])
|
||||
estimators = filtered_estimators
|
||||
if type_filter:
|
||||
raise ValueError("Parameter type_filter must be 'classifier', "
|
||||
"'regressor', 'transformer', 'cluster' or "
|
||||
"None, got"
|
||||
" %s." % repr(type_filter))
|
||||
|
||||
# drop duplicates, sort for reproducibility
|
||||
# itemgetter is used to ensure the sort does not extend to the 2nd item of
|
||||
# the tuple
|
||||
return sorted(set(estimators), key=itemgetter(0))
|
||||
|
||||
|
||||
def set_random_state(estimator, random_state=0):
|
||||
"""Set random state of an estimator if it has the `random_state` param.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : object
|
||||
The estimator
|
||||
random_state : int, RandomState instance or None, optional, default=0
|
||||
Pseudo random number generator state.
|
||||
Pass an int for reproducible results across multiple function calls.
|
||||
See :term:`Glossary <random_state>`.
|
||||
"""
|
||||
if "random_state" in estimator.get_params():
|
||||
estimator.set_params(random_state=random_state)
|
||||
|
||||
|
||||
try:
|
||||
import pytest
|
||||
|
||||
skip_if_32bit = pytest.mark.skipif(_IS_32BIT,
|
||||
reason='skipped on 32bit platforms')
|
||||
skip_travis = pytest.mark.skipif(os.environ.get('TRAVIS') == 'true',
|
||||
reason='skip on travis')
|
||||
fails_if_pypy = pytest.mark.xfail(IS_PYPY,
|
||||
reason='not compatible with PyPy')
|
||||
skip_if_no_parallel = pytest.mark.skipif(not joblib.parallel.mp,
|
||||
reason="joblib is in serial mode")
|
||||
|
||||
# Decorator for tests involving both BLAS calls and multiprocessing.
|
||||
#
|
||||
# Under POSIX (e.g. Linux or OSX), using multiprocessing in conjunction
|
||||
# with some implementation of BLAS (or other libraries that manage an
|
||||
# internal posix thread pool) can cause a crash or a freeze of the Python
|
||||
# process.
|
||||
#
|
||||
# In practice all known packaged distributions (from Linux distros or
|
||||
# Anaconda) of BLAS under Linux seems to be safe. So we this problem seems
|
||||
# to only impact OSX users.
|
||||
#
|
||||
# This wrapper makes it possible to skip tests that can possibly cause
|
||||
# this crash under OS X with.
|
||||
#
|
||||
# Under Python 3.4+ it is possible to use the `forkserver` start method
|
||||
# for multiprocessing to avoid this issue. However it can cause pickling
|
||||
# errors on interactively defined functions. It therefore not enabled by
|
||||
# default.
|
||||
|
||||
if_safe_multiprocessing_with_blas = pytest.mark.skipif(
|
||||
sys.platform == 'darwin',
|
||||
reason="Possible multi-process bug with some BLAS")
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
def check_skip_network():
|
||||
if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 0)):
|
||||
raise SkipTest("Text tutorial requires large dataset download")
|
||||
|
||||
|
||||
def _delete_folder(folder_path, warn=False):
|
||||
"""Utility function to cleanup a temporary folder if still existing.
|
||||
|
||||
Copy from joblib.pool (for independence).
|
||||
"""
|
||||
try:
|
||||
if os.path.exists(folder_path):
|
||||
# This can fail under windows,
|
||||
# but will succeed when called by atexit
|
||||
shutil.rmtree(folder_path)
|
||||
except WindowsError:
|
||||
if warn:
|
||||
warnings.warn("Could not delete temporary folder %s" % folder_path)
|
||||
|
||||
|
||||
class TempMemmap:
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
data
|
||||
mmap_mode
|
||||
"""
|
||||
def __init__(self, data, mmap_mode='r'):
|
||||
self.mmap_mode = mmap_mode
|
||||
self.data = data
|
||||
|
||||
def __enter__(self):
|
||||
data_read_only, self.temp_folder = create_memmap_backed_data(
|
||||
self.data, mmap_mode=self.mmap_mode, return_folder=True)
|
||||
return data_read_only
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
_delete_folder(self.temp_folder)
|
||||
|
||||
|
||||
def create_memmap_backed_data(data, mmap_mode='r', return_folder=False):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
data
|
||||
mmap_mode
|
||||
return_folder
|
||||
"""
|
||||
temp_folder = tempfile.mkdtemp(prefix='sklearn_testing_')
|
||||
atexit.register(functools.partial(_delete_folder, temp_folder, warn=True))
|
||||
filename = op.join(temp_folder, 'data.pkl')
|
||||
joblib.dump(data, filename)
|
||||
memmap_backed_data = joblib.load(filename, mmap_mode=mmap_mode)
|
||||
result = (memmap_backed_data if not return_folder
|
||||
else (memmap_backed_data, temp_folder))
|
||||
return result
|
||||
|
||||
|
||||
# Utils to test docstrings
|
||||
|
||||
|
||||
def _get_args(function, varargs=False):
|
||||
"""Helper to get function arguments"""
|
||||
|
||||
try:
|
||||
params = signature(function).parameters
|
||||
except ValueError:
|
||||
# Error on builtin C function
|
||||
return []
|
||||
args = [key for key, param in params.items()
|
||||
if param.kind not in (param.VAR_POSITIONAL, param.VAR_KEYWORD)]
|
||||
if varargs:
|
||||
varargs = [param.name for param in params.values()
|
||||
if param.kind == param.VAR_POSITIONAL]
|
||||
if len(varargs) == 0:
|
||||
varargs = None
|
||||
return args, varargs
|
||||
else:
|
||||
return args
|
||||
|
||||
|
||||
def _get_func_name(func):
|
||||
"""Get function full name
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : callable
|
||||
The function object.
|
||||
|
||||
Returns
|
||||
-------
|
||||
name : str
|
||||
The function name.
|
||||
"""
|
||||
parts = []
|
||||
module = inspect.getmodule(func)
|
||||
if module:
|
||||
parts.append(module.__name__)
|
||||
|
||||
qualname = func.__qualname__
|
||||
if qualname != func.__name__:
|
||||
parts.append(qualname[:qualname.find('.')])
|
||||
|
||||
parts.append(func.__name__)
|
||||
return '.'.join(parts)
|
||||
|
||||
|
||||
def check_docstring_parameters(func, doc=None, ignore=None):
|
||||
"""Helper to check docstring
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : callable
|
||||
The function object to test.
|
||||
doc : str, optional (default: None)
|
||||
Docstring if it is passed manually to the test.
|
||||
ignore : None | list
|
||||
Parameters to ignore.
|
||||
|
||||
Returns
|
||||
-------
|
||||
incorrect : list
|
||||
A list of string describing the incorrect results.
|
||||
"""
|
||||
from numpydoc import docscrape
|
||||
incorrect = []
|
||||
ignore = [] if ignore is None else ignore
|
||||
|
||||
func_name = _get_func_name(func)
|
||||
if (not func_name.startswith('sklearn.') or
|
||||
func_name.startswith('sklearn.externals')):
|
||||
return incorrect
|
||||
# Don't check docstring for property-functions
|
||||
if inspect.isdatadescriptor(func):
|
||||
return incorrect
|
||||
# Don't check docstring for setup / teardown pytest functions
|
||||
if func_name.split('.')[-1] in ('setup_module', 'teardown_module'):
|
||||
return incorrect
|
||||
# Dont check estimator_checks module
|
||||
if func_name.split('.')[2] == 'estimator_checks':
|
||||
return incorrect
|
||||
# Get the arguments from the function signature
|
||||
param_signature = list(filter(lambda x: x not in ignore, _get_args(func)))
|
||||
# drop self
|
||||
if len(param_signature) > 0 and param_signature[0] == 'self':
|
||||
param_signature.remove('self')
|
||||
|
||||
# Analyze function's docstring
|
||||
if doc is None:
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
try:
|
||||
doc = docscrape.FunctionDoc(func)
|
||||
except Exception as exp:
|
||||
incorrect += [func_name + ' parsing error: ' + str(exp)]
|
||||
return incorrect
|
||||
if len(w):
|
||||
raise RuntimeError('Error for %s:\n%s' % (func_name, w[0]))
|
||||
|
||||
param_docs = []
|
||||
for name, type_definition, param_doc in doc['Parameters']:
|
||||
# Type hints are empty only if parameter name ended with :
|
||||
if not type_definition.strip():
|
||||
if ':' in name and name[:name.index(':')][-1:].strip():
|
||||
incorrect += [func_name +
|
||||
' There was no space between the param name and '
|
||||
'colon (%r)' % name]
|
||||
elif name.rstrip().endswith(':'):
|
||||
incorrect += [func_name +
|
||||
' Parameter %r has an empty type spec. '
|
||||
'Remove the colon' % (name.lstrip())]
|
||||
|
||||
# Create a list of parameters to compare with the parameters gotten
|
||||
# from the func signature
|
||||
if '*' not in name:
|
||||
param_docs.append(name.split(':')[0].strip('` '))
|
||||
|
||||
# If one of the docstring's parameters had an error then return that
|
||||
# incorrect message
|
||||
if len(incorrect) > 0:
|
||||
return incorrect
|
||||
|
||||
# Remove the parameters that should be ignored from list
|
||||
param_docs = list(filter(lambda x: x not in ignore, param_docs))
|
||||
|
||||
# The following is derived from pytest, Copyright (c) 2004-2017 Holger
|
||||
# Krekel and others, Licensed under MIT License. See
|
||||
# https://github.com/pytest-dev/pytest
|
||||
|
||||
message = []
|
||||
for i in range(min(len(param_docs), len(param_signature))):
|
||||
if param_signature[i] != param_docs[i]:
|
||||
message += ["There's a parameter name mismatch in function"
|
||||
" docstring w.r.t. function signature, at index %s"
|
||||
" diff: %r != %r" %
|
||||
(i, param_signature[i], param_docs[i])]
|
||||
break
|
||||
if len(param_signature) > len(param_docs):
|
||||
message += ["Parameters in function docstring have less items w.r.t."
|
||||
" function signature, first missing item: %s" %
|
||||
param_signature[len(param_docs)]]
|
||||
|
||||
elif len(param_signature) < len(param_docs):
|
||||
message += ["Parameters in function docstring have more items w.r.t."
|
||||
" function signature, first extra item: %s" %
|
||||
param_docs[len(param_signature)]]
|
||||
|
||||
# If there wasn't any difference in the parameters themselves between
|
||||
# docstring and signature including having the same length then return
|
||||
# empty list
|
||||
if len(message) == 0:
|
||||
return []
|
||||
|
||||
import difflib
|
||||
import pprint
|
||||
|
||||
param_docs_formatted = pprint.pformat(param_docs).splitlines()
|
||||
param_signature_formatted = pprint.pformat(param_signature).splitlines()
|
||||
|
||||
message += ["Full diff:"]
|
||||
|
||||
message.extend(
|
||||
line.strip() for line in difflib.ndiff(param_signature_formatted,
|
||||
param_docs_formatted)
|
||||
)
|
||||
|
||||
incorrect.extend(message)
|
||||
|
||||
# Prepend function name
|
||||
incorrect = ['In function: ' + func_name] + incorrect
|
||||
|
||||
return incorrect
|
||||
|
||||
|
||||
def assert_run_python_script(source_code, timeout=60):
|
||||
"""Utility to check assertions in an independent Python subprocess.
|
||||
|
||||
The script provided in the source code should return 0 and not print
|
||||
anything on stderr or stdout.
|
||||
|
||||
This is a port from cloudpickle https://github.com/cloudpipe/cloudpickle
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source_code : str
|
||||
The Python source code to execute.
|
||||
timeout : int
|
||||
Time in seconds before timeout.
|
||||
"""
|
||||
fd, source_file = tempfile.mkstemp(suffix='_src_test_sklearn.py')
|
||||
os.close(fd)
|
||||
try:
|
||||
with open(source_file, 'wb') as f:
|
||||
f.write(source_code.encode('utf-8'))
|
||||
cmd = [sys.executable, source_file]
|
||||
cwd = op.normpath(op.join(op.dirname(sklearn.__file__), '..'))
|
||||
env = os.environ.copy()
|
||||
try:
|
||||
env["PYTHONPATH"] = os.pathsep.join([cwd, env["PYTHONPATH"]])
|
||||
except KeyError:
|
||||
env["PYTHONPATH"] = cwd
|
||||
kwargs = {
|
||||
'cwd': cwd,
|
||||
'stderr': STDOUT,
|
||||
'env': env
|
||||
}
|
||||
# If coverage is running, pass the config file to the subprocess
|
||||
coverage_rc = os.environ.get("COVERAGE_PROCESS_START")
|
||||
if coverage_rc:
|
||||
kwargs['env']['COVERAGE_PROCESS_START'] = coverage_rc
|
||||
|
||||
kwargs['timeout'] = timeout
|
||||
try:
|
||||
try:
|
||||
out = check_output(cmd, **kwargs)
|
||||
except CalledProcessError as e:
|
||||
raise RuntimeError(u"script errored with output:\n%s"
|
||||
% e.output.decode('utf-8'))
|
||||
if out != b"":
|
||||
raise AssertionError(out.decode('utf-8'))
|
||||
except TimeoutExpired as e:
|
||||
raise RuntimeError(u"script timeout, output so far:\n%s"
|
||||
% e.output.decode('utf-8'))
|
||||
finally:
|
||||
os.unlink(source_file)
|
||||
|
||||
|
||||
def _convert_container(container, constructor_name, columns_name=None):
|
||||
if constructor_name == 'list':
|
||||
return list(container)
|
||||
elif constructor_name == 'tuple':
|
||||
return tuple(container)
|
||||
elif constructor_name == 'array':
|
||||
return np.asarray(container)
|
||||
elif constructor_name == 'sparse':
|
||||
return sp.sparse.csr_matrix(container)
|
||||
elif constructor_name == 'dataframe':
|
||||
pd = pytest.importorskip('pandas')
|
||||
return pd.DataFrame(container, columns=columns_name)
|
||||
elif constructor_name == 'series':
|
||||
pd = pytest.importorskip('pandas')
|
||||
return pd.Series(container)
|
||||
elif constructor_name == 'index':
|
||||
pd = pytest.importorskip('pandas')
|
||||
return pd.Index(container)
|
||||
elif constructor_name == 'slice':
|
||||
return slice(container[0], container[1])
|
Binary file not shown.
20
venv/Lib/site-packages/sklearn/utils/_weight_vector.pxd
Normal file
20
venv/Lib/site-packages/sklearn/utils/_weight_vector.pxd
Normal file
|
@ -0,0 +1,20 @@
|
|||
"""Efficient (dense) parameter vector implementation for linear models. """
|
||||
|
||||
cdef class WeightVector(object):
|
||||
cdef double *w_data_ptr
|
||||
cdef double *aw_data_ptr
|
||||
cdef double wscale
|
||||
cdef double average_a
|
||||
cdef double average_b
|
||||
cdef int n_features
|
||||
cdef double sq_norm
|
||||
|
||||
cdef void add(self, double *x_data_ptr, int *x_ind_ptr,
|
||||
int xnnz, double c) nogil
|
||||
cdef void add_average(self, double *x_data_ptr, int *x_ind_ptr,
|
||||
int xnnz, double c, double num_iter) nogil
|
||||
cdef double dot(self, double *x_data_ptr, int *x_ind_ptr,
|
||||
int xnnz) nogil
|
||||
cdef void scale(self, double c) nogil
|
||||
cdef void reset_wscale(self) nogil
|
||||
cdef double norm(self) nogil
|
BIN
venv/Lib/site-packages/sklearn/utils/arrayfuncs.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/sklearn/utils/arrayfuncs.cp36-win32.pyd
Normal file
Binary file not shown.
181
venv/Lib/site-packages/sklearn/utils/class_weight.py
Normal file
181
venv/Lib/site-packages/sklearn/utils/class_weight.py
Normal file
|
@ -0,0 +1,181 @@
|
|||
# Authors: Andreas Mueller
|
||||
# Manoj Kumar
|
||||
# License: BSD 3 clause
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .validation import _deprecate_positional_args
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def compute_class_weight(class_weight, *, classes, y):
|
||||
"""Estimate class weights for unbalanced datasets.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
class_weight : dict, 'balanced' or None
|
||||
If 'balanced', class weights will be given by
|
||||
``n_samples / (n_classes * np.bincount(y))``.
|
||||
If a dictionary is given, keys are classes and values
|
||||
are corresponding class weights.
|
||||
If None is given, the class weights will be uniform.
|
||||
|
||||
classes : ndarray
|
||||
Array of the classes occurring in the data, as given by
|
||||
``np.unique(y_org)`` with ``y_org`` the original class labels.
|
||||
|
||||
y : array-like, shape (n_samples,)
|
||||
Array of original class labels per sample;
|
||||
|
||||
Returns
|
||||
-------
|
||||
class_weight_vect : ndarray, shape (n_classes,)
|
||||
Array with class_weight_vect[i] the weight for i-th class
|
||||
|
||||
References
|
||||
----------
|
||||
The "balanced" heuristic is inspired by
|
||||
Logistic Regression in Rare Events Data, King, Zen, 2001.
|
||||
"""
|
||||
# Import error caused by circular imports.
|
||||
from ..preprocessing import LabelEncoder
|
||||
|
||||
if set(y) - set(classes):
|
||||
raise ValueError("classes should include all valid labels that can "
|
||||
"be in y")
|
||||
if class_weight is None or len(class_weight) == 0:
|
||||
# uniform class weights
|
||||
weight = np.ones(classes.shape[0], dtype=np.float64, order='C')
|
||||
elif class_weight == 'balanced':
|
||||
# Find the weight of each class as present in y.
|
||||
le = LabelEncoder()
|
||||
y_ind = le.fit_transform(y)
|
||||
if not all(np.in1d(classes, le.classes_)):
|
||||
raise ValueError("classes should have valid labels that are in y")
|
||||
|
||||
recip_freq = len(y) / (len(le.classes_) *
|
||||
np.bincount(y_ind).astype(np.float64))
|
||||
weight = recip_freq[le.transform(classes)]
|
||||
else:
|
||||
# user-defined dictionary
|
||||
weight = np.ones(classes.shape[0], dtype=np.float64, order='C')
|
||||
if not isinstance(class_weight, dict):
|
||||
raise ValueError("class_weight must be dict, 'balanced', or None,"
|
||||
" got: %r" % class_weight)
|
||||
for c in class_weight:
|
||||
i = np.searchsorted(classes, c)
|
||||
if i >= len(classes) or classes[i] != c:
|
||||
raise ValueError("Class label {} not present.".format(c))
|
||||
else:
|
||||
weight[i] = class_weight[c]
|
||||
|
||||
return weight
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def compute_sample_weight(class_weight, y, *, indices=None):
|
||||
"""Estimate sample weights by class for unbalanced datasets.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
class_weight : dict, list of dicts, "balanced", or None, optional
|
||||
Weights associated with classes in the form ``{class_label: weight}``.
|
||||
If not given, all classes are supposed to have weight one. For
|
||||
multi-output problems, a list of dicts can be provided in the same
|
||||
order as the columns of y.
|
||||
|
||||
Note that for multioutput (including multilabel) weights should be
|
||||
defined for each class of every column in its own dict. For example,
|
||||
for four-class multilabel classification weights should be
|
||||
[{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of
|
||||
[{1:1}, {2:5}, {3:1}, {4:1}].
|
||||
|
||||
The "balanced" mode uses the values of y to automatically adjust
|
||||
weights inversely proportional to class frequencies in the input data:
|
||||
``n_samples / (n_classes * np.bincount(y))``.
|
||||
|
||||
For multi-output, the weights of each column of y will be multiplied.
|
||||
|
||||
y : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
||||
Array of original class labels per sample.
|
||||
|
||||
indices : array-like, shape (n_subsample,), or None
|
||||
Array of indices to be used in a subsample. Can be of length less than
|
||||
n_samples in the case of a subsample, or equal to n_samples in the
|
||||
case of a bootstrap subsample with repeated indices. If None, the
|
||||
sample weight will be calculated over the full sample. Only "balanced"
|
||||
is supported for class_weight if this is provided.
|
||||
|
||||
Returns
|
||||
-------
|
||||
sample_weight_vect : ndarray, shape (n_samples,)
|
||||
Array with sample weights as applied to the original y
|
||||
"""
|
||||
|
||||
y = np.atleast_1d(y)
|
||||
if y.ndim == 1:
|
||||
y = np.reshape(y, (-1, 1))
|
||||
n_outputs = y.shape[1]
|
||||
|
||||
if isinstance(class_weight, str):
|
||||
if class_weight not in ['balanced']:
|
||||
raise ValueError('The only valid preset for class_weight is '
|
||||
'"balanced". Given "%s".' % class_weight)
|
||||
elif (indices is not None and
|
||||
not isinstance(class_weight, str)):
|
||||
raise ValueError('The only valid class_weight for subsampling is '
|
||||
'"balanced". Given "%s".' % class_weight)
|
||||
elif n_outputs > 1:
|
||||
if (not hasattr(class_weight, "__iter__") or
|
||||
isinstance(class_weight, dict)):
|
||||
raise ValueError("For multi-output, class_weight should be a "
|
||||
"list of dicts, or a valid string.")
|
||||
if len(class_weight) != n_outputs:
|
||||
raise ValueError("For multi-output, number of elements in "
|
||||
"class_weight should match number of outputs.")
|
||||
|
||||
expanded_class_weight = []
|
||||
for k in range(n_outputs):
|
||||
|
||||
y_full = y[:, k]
|
||||
classes_full = np.unique(y_full)
|
||||
classes_missing = None
|
||||
|
||||
if class_weight == 'balanced' or n_outputs == 1:
|
||||
class_weight_k = class_weight
|
||||
else:
|
||||
class_weight_k = class_weight[k]
|
||||
|
||||
if indices is not None:
|
||||
# Get class weights for the subsample, covering all classes in
|
||||
# case some labels that were present in the original data are
|
||||
# missing from the sample.
|
||||
y_subsample = y[indices, k]
|
||||
classes_subsample = np.unique(y_subsample)
|
||||
|
||||
weight_k = np.take(compute_class_weight(class_weight_k,
|
||||
classes=classes_subsample,
|
||||
y=y_subsample),
|
||||
np.searchsorted(classes_subsample,
|
||||
classes_full),
|
||||
mode='clip')
|
||||
|
||||
classes_missing = set(classes_full) - set(classes_subsample)
|
||||
else:
|
||||
weight_k = compute_class_weight(class_weight_k,
|
||||
classes=classes_full,
|
||||
y=y_full)
|
||||
|
||||
weight_k = weight_k[np.searchsorted(classes_full, y_full)]
|
||||
|
||||
if classes_missing:
|
||||
# Make missing classes' weight zero
|
||||
weight_k[np.in1d(y_full, list(classes_missing))] = 0.
|
||||
|
||||
expanded_class_weight.append(weight_k)
|
||||
|
||||
expanded_class_weight = np.prod(expanded_class_weight,
|
||||
axis=0,
|
||||
dtype=np.float64)
|
||||
|
||||
return expanded_class_weight
|
143
venv/Lib/site-packages/sklearn/utils/deprecation.py
Normal file
143
venv/Lib/site-packages/sklearn/utils/deprecation.py
Normal file
|
@ -0,0 +1,143 @@
|
|||
import warnings
|
||||
import functools
|
||||
|
||||
|
||||
__all__ = ["deprecated"]
|
||||
|
||||
|
||||
class deprecated:
|
||||
"""Decorator to mark a function or class as deprecated.
|
||||
|
||||
Issue a warning when the function is called/the class is instantiated and
|
||||
adds a warning to the docstring.
|
||||
|
||||
The optional extra argument will be appended to the deprecation message
|
||||
and the docstring. Note: to use this with the default value for extra, put
|
||||
in an empty of parentheses:
|
||||
|
||||
>>> from sklearn.utils import deprecated
|
||||
>>> deprecated()
|
||||
<sklearn.utils.deprecation.deprecated object at ...>
|
||||
|
||||
>>> @deprecated()
|
||||
... def some_function(): pass
|
||||
|
||||
Parameters
|
||||
----------
|
||||
extra : string
|
||||
to be added to the deprecation messages
|
||||
"""
|
||||
|
||||
# Adapted from https://wiki.python.org/moin/PythonDecoratorLibrary,
|
||||
# but with many changes.
|
||||
|
||||
def __init__(self, extra=''):
|
||||
self.extra = extra
|
||||
|
||||
def __call__(self, obj):
|
||||
"""Call method
|
||||
|
||||
Parameters
|
||||
----------
|
||||
obj : object
|
||||
"""
|
||||
if isinstance(obj, type):
|
||||
return self._decorate_class(obj)
|
||||
elif isinstance(obj, property):
|
||||
# Note that this is only triggered properly if the `property`
|
||||
# decorator comes before the `deprecated` decorator, like so:
|
||||
#
|
||||
# @deprecated(msg)
|
||||
# @property
|
||||
# def deprecated_attribute_(self):
|
||||
# ...
|
||||
return self._decorate_property(obj)
|
||||
else:
|
||||
return self._decorate_fun(obj)
|
||||
|
||||
def _decorate_class(self, cls):
|
||||
msg = "Class %s is deprecated" % cls.__name__
|
||||
if self.extra:
|
||||
msg += "; %s" % self.extra
|
||||
|
||||
# FIXME: we should probably reset __new__ for full generality
|
||||
init = cls.__init__
|
||||
|
||||
def wrapped(*args, **kwargs):
|
||||
warnings.warn(msg, category=FutureWarning)
|
||||
return init(*args, **kwargs)
|
||||
cls.__init__ = wrapped
|
||||
|
||||
wrapped.__name__ = '__init__'
|
||||
wrapped.__doc__ = self._update_doc(init.__doc__)
|
||||
wrapped.deprecated_original = init
|
||||
|
||||
return cls
|
||||
|
||||
def _decorate_fun(self, fun):
|
||||
"""Decorate function fun"""
|
||||
|
||||
msg = "Function %s is deprecated" % fun.__name__
|
||||
if self.extra:
|
||||
msg += "; %s" % self.extra
|
||||
|
||||
@functools.wraps(fun)
|
||||
def wrapped(*args, **kwargs):
|
||||
warnings.warn(msg, category=FutureWarning)
|
||||
return fun(*args, **kwargs)
|
||||
|
||||
wrapped.__doc__ = self._update_doc(wrapped.__doc__)
|
||||
# Add a reference to the wrapped function so that we can introspect
|
||||
# on function arguments in Python 2 (already works in Python 3)
|
||||
wrapped.__wrapped__ = fun
|
||||
|
||||
return wrapped
|
||||
|
||||
def _decorate_property(self, prop):
|
||||
msg = self.extra
|
||||
|
||||
@property
|
||||
def wrapped(*args, **kwargs):
|
||||
warnings.warn(msg, category=FutureWarning)
|
||||
return prop.fget(*args, **kwargs)
|
||||
|
||||
return wrapped
|
||||
|
||||
def _update_doc(self, olddoc):
|
||||
newdoc = "DEPRECATED"
|
||||
if self.extra:
|
||||
newdoc = "%s: %s" % (newdoc, self.extra)
|
||||
if olddoc:
|
||||
newdoc = "%s\n\n %s" % (newdoc, olddoc)
|
||||
return newdoc
|
||||
|
||||
|
||||
def _is_deprecated(func):
|
||||
"""Helper to check if func is wrapped by our deprecated decorator"""
|
||||
closures = getattr(func, '__closure__', [])
|
||||
if closures is None:
|
||||
closures = []
|
||||
is_deprecated = ('deprecated' in ''.join([c.cell_contents
|
||||
for c in closures
|
||||
if isinstance(c.cell_contents, str)]))
|
||||
return is_deprecated
|
||||
|
||||
|
||||
def _raise_dep_warning_if_not_pytest(deprecated_path, correct_path):
|
||||
|
||||
# Raise a deprecation warning with standardized deprecation message.
|
||||
# Useful because we are now deprecating # anything that isn't explicitly
|
||||
# in an __init__ file.
|
||||
|
||||
# TODO: remove in 0.24 since this shouldn't be needed anymore.
|
||||
|
||||
message = (
|
||||
"The {deprecated_path} module is deprecated in version "
|
||||
"0.22 and will be removed in version 0.24. "
|
||||
"The corresponding classes / functions "
|
||||
"should instead be imported from {correct_path}. "
|
||||
"Anything that cannot be imported from {correct_path} is now "
|
||||
"part of the private API."
|
||||
).format(deprecated_path=deprecated_path, correct_path=correct_path)
|
||||
|
||||
warnings.warn(message, FutureWarning)
|
3021
venv/Lib/site-packages/sklearn/utils/estimator_checks.py
Normal file
3021
venv/Lib/site-packages/sklearn/utils/estimator_checks.py
Normal file
File diff suppressed because it is too large
Load diff
837
venv/Lib/site-packages/sklearn/utils/extmath.py
Normal file
837
venv/Lib/site-packages/sklearn/utils/extmath.py
Normal file
|
@ -0,0 +1,837 @@
|
|||
"""
|
||||
Extended math utilities.
|
||||
"""
|
||||
# Authors: Gael Varoquaux
|
||||
# Alexandre Gramfort
|
||||
# Alexandre T. Passos
|
||||
# Olivier Grisel
|
||||
# Lars Buitinck
|
||||
# Stefan van der Walt
|
||||
# Kyle Kastner
|
||||
# Giorgio Patrini
|
||||
# License: BSD 3 clause
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
from scipy import linalg, sparse
|
||||
|
||||
from . import check_random_state
|
||||
from ._logistic_sigmoid import _log_logistic_sigmoid
|
||||
from .sparsefuncs_fast import csr_row_norms
|
||||
from .validation import check_array
|
||||
from .validation import _deprecate_positional_args
|
||||
from .deprecation import deprecated
|
||||
|
||||
|
||||
def squared_norm(x):
|
||||
"""Squared Euclidean or Frobenius norm of x.
|
||||
|
||||
Faster than norm(x) ** 2.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array_like
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
The Euclidean norm when x is a vector, the Frobenius norm when x
|
||||
is a matrix (2-d array).
|
||||
"""
|
||||
x = np.ravel(x, order='K')
|
||||
if np.issubdtype(x.dtype, np.integer):
|
||||
warnings.warn('Array type is integer, np.dot may overflow. '
|
||||
'Data should be float type to avoid this issue',
|
||||
UserWarning)
|
||||
return np.dot(x, x)
|
||||
|
||||
|
||||
def row_norms(X, squared=False):
|
||||
"""Row-wise (squared) Euclidean norm of X.
|
||||
|
||||
Equivalent to np.sqrt((X * X).sum(axis=1)), but also supports sparse
|
||||
matrices and does not create an X.shape-sized temporary.
|
||||
|
||||
Performs no input validation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array_like
|
||||
The input array
|
||||
squared : bool, optional (default = False)
|
||||
If True, return squared norms.
|
||||
|
||||
Returns
|
||||
-------
|
||||
array_like
|
||||
The row-wise (squared) Euclidean norm of X.
|
||||
"""
|
||||
if sparse.issparse(X):
|
||||
if not isinstance(X, sparse.csr_matrix):
|
||||
X = sparse.csr_matrix(X)
|
||||
norms = csr_row_norms(X)
|
||||
else:
|
||||
norms = np.einsum('ij,ij->i', X, X)
|
||||
|
||||
if not squared:
|
||||
np.sqrt(norms, norms)
|
||||
return norms
|
||||
|
||||
|
||||
def fast_logdet(A):
|
||||
"""Compute log(det(A)) for A symmetric
|
||||
|
||||
Equivalent to : np.log(nl.det(A)) but more robust.
|
||||
It returns -Inf if det(A) is non positive or is not defined.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
A : array_like
|
||||
The matrix
|
||||
"""
|
||||
sign, ld = np.linalg.slogdet(A)
|
||||
if not sign > 0:
|
||||
return -np.inf
|
||||
return ld
|
||||
|
||||
|
||||
def density(w, **kwargs):
|
||||
"""Compute density of a sparse vector
|
||||
|
||||
Parameters
|
||||
----------
|
||||
w : array_like
|
||||
The sparse vector
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
The density of w, between 0 and 1
|
||||
"""
|
||||
if hasattr(w, "toarray"):
|
||||
d = float(w.nnz) / (w.shape[0] * w.shape[1])
|
||||
else:
|
||||
d = 0 if w is None else float((w != 0).sum()) / w.size
|
||||
return d
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def safe_sparse_dot(a, b, *, dense_output=False):
|
||||
"""Dot product that handle the sparse matrix case correctly
|
||||
|
||||
Parameters
|
||||
----------
|
||||
a : array or sparse matrix
|
||||
b : array or sparse matrix
|
||||
dense_output : boolean, (default=False)
|
||||
When False, ``a`` and ``b`` both being sparse will yield sparse output.
|
||||
When True, output will always be a dense array.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dot_product : array or sparse matrix
|
||||
sparse if ``a`` and ``b`` are sparse and ``dense_output=False``.
|
||||
"""
|
||||
if a.ndim > 2 or b.ndim > 2:
|
||||
if sparse.issparse(a):
|
||||
# sparse is always 2D. Implies b is 3D+
|
||||
# [i, j] @ [k, ..., l, m, n] -> [i, k, ..., l, n]
|
||||
b_ = np.rollaxis(b, -2)
|
||||
b_2d = b_.reshape((b.shape[-2], -1))
|
||||
ret = a @ b_2d
|
||||
ret = ret.reshape(a.shape[0], *b_.shape[1:])
|
||||
elif sparse.issparse(b):
|
||||
# sparse is always 2D. Implies a is 3D+
|
||||
# [k, ..., l, m] @ [i, j] -> [k, ..., l, j]
|
||||
a_2d = a.reshape(-1, a.shape[-1])
|
||||
ret = a_2d @ b
|
||||
ret = ret.reshape(*a.shape[:-1], b.shape[1])
|
||||
else:
|
||||
ret = np.dot(a, b)
|
||||
else:
|
||||
ret = a @ b
|
||||
|
||||
if (sparse.issparse(a) and sparse.issparse(b)
|
||||
and dense_output and hasattr(ret, "toarray")):
|
||||
return ret.toarray()
|
||||
return ret
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def randomized_range_finder(A, *, size, n_iter,
|
||||
power_iteration_normalizer='auto',
|
||||
random_state=None):
|
||||
"""Computes an orthonormal matrix whose range approximates the range of A.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
A : 2D array
|
||||
The input data matrix
|
||||
|
||||
size : integer
|
||||
Size of the return array
|
||||
|
||||
n_iter : integer
|
||||
Number of power iterations used to stabilize the result
|
||||
|
||||
power_iteration_normalizer : 'auto' (default), 'QR', 'LU', 'none'
|
||||
Whether the power iterations are normalized with step-by-step
|
||||
QR factorization (the slowest but most accurate), 'none'
|
||||
(the fastest but numerically unstable when `n_iter` is large, e.g.
|
||||
typically 5 or larger), or 'LU' factorization (numerically stable
|
||||
but can lose slightly in accuracy). The 'auto' mode applies no
|
||||
normalization if `n_iter` <= 2 and switches to LU otherwise.
|
||||
|
||||
.. versionadded:: 0.18
|
||||
|
||||
random_state : int, RandomState instance or None, optional (default=None)
|
||||
The seed of the pseudo random number generator to use when shuffling
|
||||
the data, i.e. getting the random vectors to initialize the algorithm.
|
||||
Pass an int for reproducible results across multiple function calls.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Q : 2D array
|
||||
A (size x size) projection matrix, the range of which
|
||||
approximates well the range of the input matrix A.
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
Follows Algorithm 4.3 of
|
||||
Finding structure with randomness: Stochastic algorithms for constructing
|
||||
approximate matrix decompositions
|
||||
Halko, et al., 2009 (arXiv:909) https://arxiv.org/pdf/0909.4061.pdf
|
||||
|
||||
An implementation of a randomized algorithm for principal component
|
||||
analysis
|
||||
A. Szlam et al. 2014
|
||||
"""
|
||||
random_state = check_random_state(random_state)
|
||||
|
||||
# Generating normal random vectors with shape: (A.shape[1], size)
|
||||
Q = random_state.normal(size=(A.shape[1], size))
|
||||
if A.dtype.kind == 'f':
|
||||
# Ensure f32 is preserved as f32
|
||||
Q = Q.astype(A.dtype, copy=False)
|
||||
|
||||
# Deal with "auto" mode
|
||||
if power_iteration_normalizer == 'auto':
|
||||
if n_iter <= 2:
|
||||
power_iteration_normalizer = 'none'
|
||||
else:
|
||||
power_iteration_normalizer = 'LU'
|
||||
|
||||
# Perform power iterations with Q to further 'imprint' the top
|
||||
# singular vectors of A in Q
|
||||
for i in range(n_iter):
|
||||
if power_iteration_normalizer == 'none':
|
||||
Q = safe_sparse_dot(A, Q)
|
||||
Q = safe_sparse_dot(A.T, Q)
|
||||
elif power_iteration_normalizer == 'LU':
|
||||
Q, _ = linalg.lu(safe_sparse_dot(A, Q), permute_l=True)
|
||||
Q, _ = linalg.lu(safe_sparse_dot(A.T, Q), permute_l=True)
|
||||
elif power_iteration_normalizer == 'QR':
|
||||
Q, _ = linalg.qr(safe_sparse_dot(A, Q), mode='economic')
|
||||
Q, _ = linalg.qr(safe_sparse_dot(A.T, Q), mode='economic')
|
||||
|
||||
# Sample the range of A using by linear projection of Q
|
||||
# Extract an orthonormal basis
|
||||
Q, _ = linalg.qr(safe_sparse_dot(A, Q), mode='economic')
|
||||
return Q
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto',
|
||||
power_iteration_normalizer='auto', transpose='auto',
|
||||
flip_sign=True, random_state=0):
|
||||
"""Computes a truncated randomized SVD
|
||||
|
||||
Parameters
|
||||
----------
|
||||
M : ndarray or sparse matrix
|
||||
Matrix to decompose
|
||||
|
||||
n_components : int
|
||||
Number of singular values and vectors to extract.
|
||||
|
||||
n_oversamples : int (default is 10)
|
||||
Additional number of random vectors to sample the range of M so as
|
||||
to ensure proper conditioning. The total number of random vectors
|
||||
used to find the range of M is n_components + n_oversamples. Smaller
|
||||
number can improve speed but can negatively impact the quality of
|
||||
approximation of singular vectors and singular values.
|
||||
|
||||
n_iter : int or 'auto' (default is 'auto')
|
||||
Number of power iterations. It can be used to deal with very noisy
|
||||
problems. When 'auto', it is set to 4, unless `n_components` is small
|
||||
(< .1 * min(X.shape)) `n_iter` in which case is set to 7.
|
||||
This improves precision with few components.
|
||||
|
||||
.. versionchanged:: 0.18
|
||||
|
||||
power_iteration_normalizer : 'auto' (default), 'QR', 'LU', 'none'
|
||||
Whether the power iterations are normalized with step-by-step
|
||||
QR factorization (the slowest but most accurate), 'none'
|
||||
(the fastest but numerically unstable when `n_iter` is large, e.g.
|
||||
typically 5 or larger), or 'LU' factorization (numerically stable
|
||||
but can lose slightly in accuracy). The 'auto' mode applies no
|
||||
normalization if `n_iter` <= 2 and switches to LU otherwise.
|
||||
|
||||
.. versionadded:: 0.18
|
||||
|
||||
transpose : True, False or 'auto' (default)
|
||||
Whether the algorithm should be applied to M.T instead of M. The
|
||||
result should approximately be the same. The 'auto' mode will
|
||||
trigger the transposition if M.shape[1] > M.shape[0] since this
|
||||
implementation of randomized SVD tend to be a little faster in that
|
||||
case.
|
||||
|
||||
.. versionchanged:: 0.18
|
||||
|
||||
flip_sign : boolean, (True by default)
|
||||
The output of a singular value decomposition is only unique up to a
|
||||
permutation of the signs of the singular vectors. If `flip_sign` is
|
||||
set to `True`, the sign ambiguity is resolved by making the largest
|
||||
loadings for each component in the left singular vectors positive.
|
||||
|
||||
random_state : int, RandomState instance or None, optional (default=None)
|
||||
The seed of the pseudo random number generator to use when shuffling
|
||||
the data, i.e. getting the random vectors to initialize the algorithm.
|
||||
Pass an int for reproducible results across multiple function calls.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This algorithm finds a (usually very good) approximate truncated
|
||||
singular value decomposition using randomization to speed up the
|
||||
computations. It is particularly fast on large matrices on which
|
||||
you wish to extract only a small number of components. In order to
|
||||
obtain further speed up, `n_iter` can be set <=2 (at the cost of
|
||||
loss of precision).
|
||||
|
||||
References
|
||||
----------
|
||||
* Finding structure with randomness: Stochastic algorithms for constructing
|
||||
approximate matrix decompositions
|
||||
Halko, et al., 2009 https://arxiv.org/abs/0909.4061
|
||||
|
||||
* A randomized algorithm for the decomposition of matrices
|
||||
Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert
|
||||
|
||||
* An implementation of a randomized algorithm for principal component
|
||||
analysis
|
||||
A. Szlam et al. 2014
|
||||
"""
|
||||
if isinstance(M, (sparse.lil_matrix, sparse.dok_matrix)):
|
||||
warnings.warn("Calculating SVD of a {} is expensive. "
|
||||
"csr_matrix is more efficient.".format(
|
||||
type(M).__name__),
|
||||
sparse.SparseEfficiencyWarning)
|
||||
|
||||
random_state = check_random_state(random_state)
|
||||
n_random = n_components + n_oversamples
|
||||
n_samples, n_features = M.shape
|
||||
|
||||
if n_iter == 'auto':
|
||||
# Checks if the number of iterations is explicitly specified
|
||||
# Adjust n_iter. 7 was found a good compromise for PCA. See #5299
|
||||
n_iter = 7 if n_components < .1 * min(M.shape) else 4
|
||||
|
||||
if transpose == 'auto':
|
||||
transpose = n_samples < n_features
|
||||
if transpose:
|
||||
# this implementation is a bit faster with smaller shape[1]
|
||||
M = M.T
|
||||
|
||||
Q = randomized_range_finder(
|
||||
M, size=n_random, n_iter=n_iter,
|
||||
power_iteration_normalizer=power_iteration_normalizer,
|
||||
random_state=random_state)
|
||||
|
||||
# project M to the (k + p) dimensional space using the basis vectors
|
||||
B = safe_sparse_dot(Q.T, M)
|
||||
|
||||
# compute the SVD on the thin matrix: (k + p) wide
|
||||
Uhat, s, V = linalg.svd(B, full_matrices=False)
|
||||
|
||||
del B
|
||||
U = np.dot(Q, Uhat)
|
||||
|
||||
if flip_sign:
|
||||
if not transpose:
|
||||
U, V = svd_flip(U, V)
|
||||
else:
|
||||
# In case of transpose u_based_decision=false
|
||||
# to actually flip based on u and not v.
|
||||
U, V = svd_flip(U, V, u_based_decision=False)
|
||||
|
||||
if transpose:
|
||||
# transpose back the results according to the input convention
|
||||
return V[:n_components, :].T, s[:n_components], U[:, :n_components].T
|
||||
else:
|
||||
return U[:, :n_components], s[:n_components], V[:n_components, :]
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def weighted_mode(a, w, *, axis=0):
|
||||
"""Returns an array of the weighted modal (most common) value in a
|
||||
|
||||
If there is more than one such value, only the first is returned.
|
||||
The bin-count for the modal bins is also returned.
|
||||
|
||||
This is an extension of the algorithm in scipy.stats.mode.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
a : array_like
|
||||
n-dimensional array of which to find mode(s).
|
||||
w : array_like
|
||||
n-dimensional array of weights for each value
|
||||
axis : int, optional
|
||||
Axis along which to operate. Default is 0, i.e. the first axis.
|
||||
|
||||
Returns
|
||||
-------
|
||||
vals : ndarray
|
||||
Array of modal values.
|
||||
score : ndarray
|
||||
Array of weighted counts for each mode.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.utils.extmath import weighted_mode
|
||||
>>> x = [4, 1, 4, 2, 4, 2]
|
||||
>>> weights = [1, 1, 1, 1, 1, 1]
|
||||
>>> weighted_mode(x, weights)
|
||||
(array([4.]), array([3.]))
|
||||
|
||||
The value 4 appears three times: with uniform weights, the result is
|
||||
simply the mode of the distribution.
|
||||
|
||||
>>> weights = [1, 3, 0.5, 1.5, 1, 2] # deweight the 4's
|
||||
>>> weighted_mode(x, weights)
|
||||
(array([2.]), array([3.5]))
|
||||
|
||||
The value 2 has the highest score: it appears twice with weights of
|
||||
1.5 and 2: the sum of these is 3.5.
|
||||
|
||||
See Also
|
||||
--------
|
||||
scipy.stats.mode
|
||||
"""
|
||||
if axis is None:
|
||||
a = np.ravel(a)
|
||||
w = np.ravel(w)
|
||||
axis = 0
|
||||
else:
|
||||
a = np.asarray(a)
|
||||
w = np.asarray(w)
|
||||
|
||||
if a.shape != w.shape:
|
||||
w = np.full(a.shape, w, dtype=w.dtype)
|
||||
|
||||
scores = np.unique(np.ravel(a)) # get ALL unique values
|
||||
testshape = list(a.shape)
|
||||
testshape[axis] = 1
|
||||
oldmostfreq = np.zeros(testshape)
|
||||
oldcounts = np.zeros(testshape)
|
||||
for score in scores:
|
||||
template = np.zeros(a.shape)
|
||||
ind = (a == score)
|
||||
template[ind] = w[ind]
|
||||
counts = np.expand_dims(np.sum(template, axis), axis)
|
||||
mostfrequent = np.where(counts > oldcounts, score, oldmostfreq)
|
||||
oldcounts = np.maximum(counts, oldcounts)
|
||||
oldmostfreq = mostfrequent
|
||||
return mostfrequent, oldcounts
|
||||
|
||||
|
||||
def cartesian(arrays, out=None):
|
||||
"""Generate a cartesian product of input arrays.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arrays : list of array-like
|
||||
1-D arrays to form the cartesian product of.
|
||||
out : ndarray
|
||||
Array to place the cartesian product in.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out : ndarray
|
||||
2-D array of shape (M, len(arrays)) containing cartesian products
|
||||
formed of input arrays.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> cartesian(([1, 2, 3], [4, 5], [6, 7]))
|
||||
array([[1, 4, 6],
|
||||
[1, 4, 7],
|
||||
[1, 5, 6],
|
||||
[1, 5, 7],
|
||||
[2, 4, 6],
|
||||
[2, 4, 7],
|
||||
[2, 5, 6],
|
||||
[2, 5, 7],
|
||||
[3, 4, 6],
|
||||
[3, 4, 7],
|
||||
[3, 5, 6],
|
||||
[3, 5, 7]])
|
||||
|
||||
"""
|
||||
arrays = [np.asarray(x) for x in arrays]
|
||||
shape = (len(x) for x in arrays)
|
||||
dtype = arrays[0].dtype
|
||||
|
||||
ix = np.indices(shape)
|
||||
ix = ix.reshape(len(arrays), -1).T
|
||||
|
||||
if out is None:
|
||||
out = np.empty_like(ix, dtype=dtype)
|
||||
|
||||
for n, arr in enumerate(arrays):
|
||||
out[:, n] = arrays[n][ix[:, n]]
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def svd_flip(u, v, u_based_decision=True):
|
||||
"""Sign correction to ensure deterministic output from SVD.
|
||||
|
||||
Adjusts the columns of u and the rows of v such that the loadings in the
|
||||
columns in u that are largest in absolute value are always positive.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
u : ndarray
|
||||
u and v are the output of `linalg.svd` or
|
||||
:func:`~sklearn.utils.extmath.randomized_svd`, with matching inner
|
||||
dimensions so one can compute `np.dot(u * s, v)`.
|
||||
|
||||
v : ndarray
|
||||
u and v are the output of `linalg.svd` or
|
||||
:func:`~sklearn.utils.extmath.randomized_svd`, with matching inner
|
||||
dimensions so one can compute `np.dot(u * s, v)`.
|
||||
|
||||
u_based_decision : boolean, (default=True)
|
||||
If True, use the columns of u as the basis for sign flipping.
|
||||
Otherwise, use the rows of v. The choice of which variable to base the
|
||||
decision on is generally algorithm dependent.
|
||||
|
||||
|
||||
Returns
|
||||
-------
|
||||
u_adjusted, v_adjusted : arrays with the same dimensions as the input.
|
||||
|
||||
"""
|
||||
if u_based_decision:
|
||||
# columns of u, rows of v
|
||||
max_abs_cols = np.argmax(np.abs(u), axis=0)
|
||||
signs = np.sign(u[max_abs_cols, range(u.shape[1])])
|
||||
u *= signs
|
||||
v *= signs[:, np.newaxis]
|
||||
else:
|
||||
# rows of v, columns of u
|
||||
max_abs_rows = np.argmax(np.abs(v), axis=1)
|
||||
signs = np.sign(v[range(v.shape[0]), max_abs_rows])
|
||||
u *= signs
|
||||
v *= signs[:, np.newaxis]
|
||||
return u, v
|
||||
|
||||
|
||||
def log_logistic(X, out=None):
|
||||
"""Compute the log of the logistic function, ``log(1 / (1 + e ** -x))``.
|
||||
|
||||
This implementation is numerically stable because it splits positive and
|
||||
negative values::
|
||||
|
||||
-log(1 + exp(-x_i)) if x_i > 0
|
||||
x_i - log(1 + exp(x_i)) if x_i <= 0
|
||||
|
||||
For the ordinary logistic function, use ``scipy.special.expit``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like, shape (M, N) or (M, )
|
||||
Argument to the logistic function
|
||||
|
||||
out : array-like, shape: (M, N) or (M, ), optional:
|
||||
Preallocated output array.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out : array, shape (M, N) or (M, )
|
||||
Log of the logistic function evaluated at every point in x
|
||||
|
||||
Notes
|
||||
-----
|
||||
See the blog post describing this implementation:
|
||||
http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression/
|
||||
"""
|
||||
is_1d = X.ndim == 1
|
||||
X = np.atleast_2d(X)
|
||||
X = check_array(X, dtype=np.float64)
|
||||
|
||||
n_samples, n_features = X.shape
|
||||
|
||||
if out is None:
|
||||
out = np.empty_like(X)
|
||||
|
||||
_log_logistic_sigmoid(n_samples, n_features, X, out)
|
||||
|
||||
if is_1d:
|
||||
return np.squeeze(out)
|
||||
return out
|
||||
|
||||
|
||||
def softmax(X, copy=True):
|
||||
"""
|
||||
Calculate the softmax function.
|
||||
|
||||
The softmax function is calculated by
|
||||
np.exp(X) / np.sum(np.exp(X), axis=1)
|
||||
|
||||
This will cause overflow when large values are exponentiated.
|
||||
Hence the largest value in each row is subtracted from each data
|
||||
point to prevent this.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of floats, shape (M, N)
|
||||
Argument to the logistic function
|
||||
|
||||
copy : bool, optional
|
||||
Copy X or not.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out : array, shape (M, N)
|
||||
Softmax function evaluated at every point in x
|
||||
"""
|
||||
if copy:
|
||||
X = np.copy(X)
|
||||
max_prob = np.max(X, axis=1).reshape((-1, 1))
|
||||
X -= max_prob
|
||||
np.exp(X, X)
|
||||
sum_prob = np.sum(X, axis=1).reshape((-1, 1))
|
||||
X /= sum_prob
|
||||
return X
|
||||
|
||||
|
||||
@deprecated("safe_min is deprecated in version 0.22 and will be removed "
|
||||
"in version 0.24.")
|
||||
def safe_min(X):
|
||||
"""Returns the minimum value of a dense or a CSR/CSC matrix.
|
||||
|
||||
Adapated from https://stackoverflow.com/q/13426580
|
||||
|
||||
.. deprecated:: 0.22.0
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array_like
|
||||
The input array or sparse matrix
|
||||
|
||||
Returns
|
||||
-------
|
||||
Float
|
||||
The min value of X
|
||||
"""
|
||||
if sparse.issparse(X):
|
||||
if len(X.data) == 0:
|
||||
return 0
|
||||
m = X.data.min()
|
||||
return m if X.getnnz() == X.size else min(m, 0)
|
||||
else:
|
||||
return X.min()
|
||||
|
||||
|
||||
def make_nonnegative(X, min_value=0):
|
||||
"""Ensure `X.min()` >= `min_value`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array_like
|
||||
The matrix to make non-negative
|
||||
min_value : float
|
||||
The threshold value
|
||||
|
||||
Returns
|
||||
-------
|
||||
array_like
|
||||
The thresholded array
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
When X is sparse
|
||||
"""
|
||||
min_ = X.min()
|
||||
if min_ < min_value:
|
||||
if sparse.issparse(X):
|
||||
raise ValueError("Cannot make the data matrix"
|
||||
" nonnegative because it is sparse."
|
||||
" Adding a value to every entry would"
|
||||
" make it no longer sparse.")
|
||||
X = X + (min_value - min_)
|
||||
return X
|
||||
|
||||
|
||||
# Use at least float64 for the accumulating functions to avoid precision issue
|
||||
# see https://github.com/numpy/numpy/issues/9393. The float64 is also retained
|
||||
# as it is in case the float overflows
|
||||
def _safe_accumulator_op(op, x, *args, **kwargs):
|
||||
"""
|
||||
This function provides numpy accumulator functions with a float64 dtype
|
||||
when used on a floating point input. This prevents accumulator overflow on
|
||||
smaller floating point dtypes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
op : function
|
||||
A numpy accumulator function such as np.mean or np.sum
|
||||
x : numpy array
|
||||
A numpy array to apply the accumulator function
|
||||
*args : positional arguments
|
||||
Positional arguments passed to the accumulator function after the
|
||||
input x
|
||||
**kwargs : keyword arguments
|
||||
Keyword arguments passed to the accumulator function
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : The output of the accumulator function passed to this function
|
||||
"""
|
||||
if np.issubdtype(x.dtype, np.floating) and x.dtype.itemsize < 8:
|
||||
result = op(x, *args, **kwargs, dtype=np.float64)
|
||||
else:
|
||||
result = op(x, *args, **kwargs)
|
||||
return result
|
||||
|
||||
|
||||
def _incremental_mean_and_var(X, last_mean, last_variance, last_sample_count):
|
||||
"""Calculate mean update and a Youngs and Cramer variance update.
|
||||
|
||||
last_mean and last_variance are statistics computed at the last step by the
|
||||
function. Both must be initialized to 0.0. In case no scaling is required
|
||||
last_variance can be None. The mean is always required and returned because
|
||||
necessary for the calculation of the variance. last_n_samples_seen is the
|
||||
number of samples encountered until now.
|
||||
|
||||
From the paper "Algorithms for computing the sample variance: analysis and
|
||||
recommendations", by Chan, Golub, and LeVeque.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like, shape (n_samples, n_features)
|
||||
Data to use for variance update
|
||||
|
||||
last_mean : array-like, shape: (n_features,)
|
||||
|
||||
last_variance : array-like, shape: (n_features,)
|
||||
|
||||
last_sample_count : array-like, shape (n_features,)
|
||||
|
||||
Returns
|
||||
-------
|
||||
updated_mean : array, shape (n_features,)
|
||||
|
||||
updated_variance : array, shape (n_features,)
|
||||
If None, only mean is computed
|
||||
|
||||
updated_sample_count : array, shape (n_features,)
|
||||
|
||||
Notes
|
||||
-----
|
||||
NaNs are ignored during the algorithm.
|
||||
|
||||
References
|
||||
----------
|
||||
T. Chan, G. Golub, R. LeVeque. Algorithms for computing the sample
|
||||
variance: recommendations, The American Statistician, Vol. 37, No. 3,
|
||||
pp. 242-247
|
||||
|
||||
Also, see the sparse implementation of this in
|
||||
`utils.sparsefuncs.incr_mean_variance_axis` and
|
||||
`utils.sparsefuncs_fast.incr_mean_variance_axis0`
|
||||
"""
|
||||
# old = stats until now
|
||||
# new = the current increment
|
||||
# updated = the aggregated stats
|
||||
last_sum = last_mean * last_sample_count
|
||||
new_sum = _safe_accumulator_op(np.nansum, X, axis=0)
|
||||
|
||||
new_sample_count = np.sum(~np.isnan(X), axis=0)
|
||||
updated_sample_count = last_sample_count + new_sample_count
|
||||
|
||||
updated_mean = (last_sum + new_sum) / updated_sample_count
|
||||
|
||||
if last_variance is None:
|
||||
updated_variance = None
|
||||
else:
|
||||
new_unnormalized_variance = (
|
||||
_safe_accumulator_op(np.nanvar, X, axis=0) * new_sample_count)
|
||||
last_unnormalized_variance = last_variance * last_sample_count
|
||||
|
||||
with np.errstate(divide='ignore', invalid='ignore'):
|
||||
last_over_new_count = last_sample_count / new_sample_count
|
||||
updated_unnormalized_variance = (
|
||||
last_unnormalized_variance + new_unnormalized_variance +
|
||||
last_over_new_count / updated_sample_count *
|
||||
(last_sum / last_over_new_count - new_sum) ** 2)
|
||||
|
||||
zeros = last_sample_count == 0
|
||||
updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros]
|
||||
updated_variance = updated_unnormalized_variance / updated_sample_count
|
||||
|
||||
return updated_mean, updated_variance, updated_sample_count
|
||||
|
||||
|
||||
def _deterministic_vector_sign_flip(u):
|
||||
"""Modify the sign of vectors for reproducibility
|
||||
|
||||
Flips the sign of elements of all the vectors (rows of u) such that
|
||||
the absolute maximum element of each vector is positive.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
u : ndarray
|
||||
Array with vectors as its rows.
|
||||
|
||||
Returns
|
||||
-------
|
||||
u_flipped : ndarray with same shape as u
|
||||
Array with the sign flipped vectors as its rows.
|
||||
"""
|
||||
max_abs_rows = np.argmax(np.abs(u), axis=1)
|
||||
signs = np.sign(u[range(u.shape[0]), max_abs_rows])
|
||||
u *= signs[:, np.newaxis]
|
||||
return u
|
||||
|
||||
|
||||
def stable_cumsum(arr, axis=None, rtol=1e-05, atol=1e-08):
|
||||
"""Use high precision for cumsum and check that final value matches sum
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr : array-like
|
||||
To be cumulatively summed as flat
|
||||
axis : int, optional
|
||||
Axis along which the cumulative sum is computed.
|
||||
The default (None) is to compute the cumsum over the flattened array.
|
||||
rtol : float
|
||||
Relative tolerance, see ``np.allclose``
|
||||
atol : float
|
||||
Absolute tolerance, see ``np.allclose``
|
||||
"""
|
||||
out = np.cumsum(arr, axis=axis, dtype=np.float64)
|
||||
expected = np.sum(arr, axis=axis, dtype=np.float64)
|
||||
if not np.all(np.isclose(out.take(-1, axis=axis), expected, rtol=rtol,
|
||||
atol=atol, equal_nan=True)):
|
||||
warnings.warn('cumsum was found to be unstable: '
|
||||
'its last element does not correspond to sum',
|
||||
RuntimeWarning)
|
||||
return out
|
18
venv/Lib/site-packages/sklearn/utils/fast_dict.py
Normal file
18
venv/Lib/site-packages/sklearn/utils/fast_dict.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _fast_dict # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.utils.fast_dict'
|
||||
correct_import_path = 'sklearn.utils'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_fast_dict, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
162
venv/Lib/site-packages/sklearn/utils/fixes.py
Normal file
162
venv/Lib/site-packages/sklearn/utils/fixes.py
Normal file
|
@ -0,0 +1,162 @@
|
|||
"""Compatibility fixes for older version of python, numpy and scipy
|
||||
|
||||
If you add content to this file, please give the version of the package
|
||||
at which the fixe is no longer needed.
|
||||
"""
|
||||
# Authors: Emmanuelle Gouillart <emmanuelle.gouillart@normalesup.org>
|
||||
# Gael Varoquaux <gael.varoquaux@normalesup.org>
|
||||
# Fabian Pedregosa <fpedregosa@acm.org>
|
||||
# Lars Buitinck
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
from distutils.version import LooseVersion
|
||||
|
||||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
import scipy
|
||||
import scipy.stats
|
||||
from scipy.sparse.linalg import lsqr as sparse_lsqr # noqa
|
||||
from numpy.ma import MaskedArray as _MaskedArray # TODO: remove in 0.25
|
||||
|
||||
from .deprecation import deprecated
|
||||
|
||||
try:
|
||||
from pkg_resources import parse_version # type: ignore
|
||||
except ImportError:
|
||||
# setuptools not installed
|
||||
parse_version = LooseVersion # type: ignore
|
||||
|
||||
|
||||
np_version = parse_version(np.__version__)
|
||||
sp_version = parse_version(scipy.__version__)
|
||||
|
||||
|
||||
if sp_version >= parse_version('1.4'):
|
||||
from scipy.sparse.linalg import lobpcg
|
||||
else:
|
||||
# Backport of lobpcg functionality from scipy 1.4.0, can be removed
|
||||
# once support for sp_version < parse_version('1.4') is dropped
|
||||
# mypy error: Name 'lobpcg' already defined (possibly by an import)
|
||||
from ..externals._lobpcg import lobpcg # type: ignore # noqa
|
||||
|
||||
|
||||
def _object_dtype_isnan(X):
|
||||
return X != X
|
||||
|
||||
|
||||
# TODO: replace by copy=False, when only scipy > 1.1 is supported.
|
||||
def _astype_copy_false(X):
|
||||
"""Returns the copy=False parameter for
|
||||
{ndarray, csr_matrix, csc_matrix}.astype when possible,
|
||||
otherwise don't specify
|
||||
"""
|
||||
if sp_version >= parse_version('1.1') or not sp.issparse(X):
|
||||
return {'copy': False}
|
||||
else:
|
||||
return {}
|
||||
|
||||
|
||||
def _joblib_parallel_args(**kwargs):
|
||||
"""Set joblib.Parallel arguments in a compatible way for 0.11 and 0.12+
|
||||
|
||||
For joblib 0.11 this maps both ``prefer`` and ``require`` parameters to
|
||||
a specific ``backend``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
prefer : str in {'processes', 'threads'} or None
|
||||
Soft hint to choose the default backend if no specific backend
|
||||
was selected with the parallel_backend context manager.
|
||||
|
||||
require : 'sharedmem' or None
|
||||
Hard condstraint to select the backend. If set to 'sharedmem',
|
||||
the selected backend will be single-host and thread-based even
|
||||
if the user asked for a non-thread based backend with
|
||||
parallel_backend.
|
||||
|
||||
See joblib.Parallel documentation for more details
|
||||
"""
|
||||
import joblib
|
||||
|
||||
if parse_version(joblib.__version__) >= parse_version('0.12'):
|
||||
return kwargs
|
||||
|
||||
extra_args = set(kwargs.keys()).difference({'prefer', 'require'})
|
||||
if extra_args:
|
||||
raise NotImplementedError('unhandled arguments %s with joblib %s'
|
||||
% (list(extra_args), joblib.__version__))
|
||||
args = {}
|
||||
if 'prefer' in kwargs:
|
||||
prefer = kwargs['prefer']
|
||||
if prefer not in ['threads', 'processes', None]:
|
||||
raise ValueError('prefer=%s is not supported' % prefer)
|
||||
args['backend'] = {'threads': 'threading',
|
||||
'processes': 'multiprocessing',
|
||||
None: None}[prefer]
|
||||
|
||||
if 'require' in kwargs:
|
||||
require = kwargs['require']
|
||||
if require not in [None, 'sharedmem']:
|
||||
raise ValueError('require=%s is not supported' % require)
|
||||
if require == 'sharedmem':
|
||||
args['backend'] = 'threading'
|
||||
return args
|
||||
|
||||
|
||||
class loguniform(scipy.stats.reciprocal):
|
||||
"""A class supporting log-uniform random variables.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
low : float
|
||||
The minimum value
|
||||
high : float
|
||||
The maximum value
|
||||
|
||||
Methods
|
||||
-------
|
||||
rvs(self, size=None, random_state=None)
|
||||
Generate log-uniform random variables
|
||||
|
||||
The most useful method for Scikit-learn usage is highlighted here.
|
||||
For a full list, see
|
||||
`scipy.stats.reciprocal
|
||||
<https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.reciprocal.html>`_.
|
||||
This list includes all functions of ``scipy.stats`` continuous
|
||||
distributions such as ``pdf``.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This class generates values between ``low`` and ``high`` or
|
||||
|
||||
low <= loguniform(low, high).rvs() <= high
|
||||
|
||||
The logarithmic probability density function (PDF) is uniform. When
|
||||
``x`` is a uniformly distributed random variable between 0 and 1, ``10**x``
|
||||
are random variales that are equally likely to be returned.
|
||||
|
||||
This class is an alias to ``scipy.stats.reciprocal``, which uses the
|
||||
reciprocal distribution:
|
||||
https://en.wikipedia.org/wiki/Reciprocal_distribution
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> from sklearn.utils.fixes import loguniform
|
||||
>>> rv = loguniform(1e-3, 1e1)
|
||||
>>> rvs = rv.rvs(random_state=42, size=1000)
|
||||
>>> rvs.min() # doctest: +SKIP
|
||||
0.0010435856341129003
|
||||
>>> rvs.max() # doctest: +SKIP
|
||||
9.97403052786026
|
||||
"""
|
||||
|
||||
|
||||
@deprecated(
|
||||
'MaskedArray is deprecated in version 0.23 and will be removed in version '
|
||||
'0.25. Use numpy.ma.MaskedArray instead.'
|
||||
)
|
||||
class MaskedArray(_MaskedArray):
|
||||
pass # TODO: remove in 0.25
|
69
venv/Lib/site-packages/sklearn/utils/graph.py
Normal file
69
venv/Lib/site-packages/sklearn/utils/graph.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
"""
|
||||
Graph utilities and algorithms
|
||||
|
||||
Graphs are represented with their adjacency matrices, preferably using
|
||||
sparse matrices.
|
||||
"""
|
||||
|
||||
# Authors: Aric Hagberg <hagberg@lanl.gov>
|
||||
# Gael Varoquaux <gael.varoquaux@normalesup.org>
|
||||
# Jake Vanderplas <vanderplas@astro.washington.edu>
|
||||
# License: BSD 3 clause
|
||||
|
||||
from scipy import sparse
|
||||
|
||||
from .graph_shortest_path import graph_shortest_path # noqa
|
||||
from .validation import _deprecate_positional_args
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Path and connected component analysis.
|
||||
# Code adapted from networkx
|
||||
@_deprecate_positional_args
|
||||
def single_source_shortest_path_length(graph, source, *, cutoff=None):
|
||||
"""Return the shortest path length from source to all reachable nodes.
|
||||
|
||||
Returns a dictionary of shortest path lengths keyed by target.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
graph : sparse matrix or 2D array (preferably LIL matrix)
|
||||
Adjacency matrix of the graph
|
||||
source : integer
|
||||
Starting node for path
|
||||
cutoff : integer, optional
|
||||
Depth to stop the search - only
|
||||
paths of length <= cutoff are returned.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.utils.graph import single_source_shortest_path_length
|
||||
>>> import numpy as np
|
||||
>>> graph = np.array([[ 0, 1, 0, 0],
|
||||
... [ 1, 0, 1, 0],
|
||||
... [ 0, 1, 0, 1],
|
||||
... [ 0, 0, 1, 0]])
|
||||
>>> list(sorted(single_source_shortest_path_length(graph, 0).items()))
|
||||
[(0, 0), (1, 1), (2, 2), (3, 3)]
|
||||
>>> graph = np.ones((6, 6))
|
||||
>>> list(sorted(single_source_shortest_path_length(graph, 2).items()))
|
||||
[(0, 1), (1, 1), (2, 0), (3, 1), (4, 1), (5, 1)]
|
||||
"""
|
||||
if sparse.isspmatrix(graph):
|
||||
graph = graph.tolil()
|
||||
else:
|
||||
graph = sparse.lil_matrix(graph)
|
||||
seen = {} # level (number of hops) when seen in BFS
|
||||
level = 0 # the current level
|
||||
next_level = [source] # dict of nodes to check at next level
|
||||
while next_level:
|
||||
this_level = next_level # advance to next level
|
||||
next_level = set() # and start a new list (fringe)
|
||||
for v in this_level:
|
||||
if v not in seen:
|
||||
seen[v] = level # set the level of vertex v
|
||||
next_level.update(graph.rows[v])
|
||||
if cutoff is not None and cutoff <= level:
|
||||
break
|
||||
level += 1
|
||||
return seen # return all path lengths as dictionary
|
Binary file not shown.
211
venv/Lib/site-packages/sklearn/utils/metaestimators.py
Normal file
211
venv/Lib/site-packages/sklearn/utils/metaestimators.py
Normal file
|
@ -0,0 +1,211 @@
|
|||
"""Utilities for meta-estimators"""
|
||||
# Author: Joel Nothman
|
||||
# Andreas Mueller
|
||||
# License: BSD
|
||||
from typing import List, Any
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from operator import attrgetter
|
||||
from functools import update_wrapper
|
||||
import numpy as np
|
||||
|
||||
from ..utils import _safe_indexing
|
||||
from ..base import BaseEstimator
|
||||
|
||||
__all__ = ['if_delegate_has_method']
|
||||
|
||||
|
||||
class _BaseComposition(BaseEstimator, metaclass=ABCMeta):
|
||||
"""Handles parameter management for classifiers composed of named estimators.
|
||||
"""
|
||||
steps: List[Any]
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def _get_params(self, attr, deep=True):
|
||||
out = super().get_params(deep=deep)
|
||||
if not deep:
|
||||
return out
|
||||
estimators = getattr(self, attr)
|
||||
out.update(estimators)
|
||||
for name, estimator in estimators:
|
||||
if hasattr(estimator, 'get_params'):
|
||||
for key, value in estimator.get_params(deep=True).items():
|
||||
out['%s__%s' % (name, key)] = value
|
||||
return out
|
||||
|
||||
def _set_params(self, attr, **params):
|
||||
# Ensure strict ordering of parameter setting:
|
||||
# 1. All steps
|
||||
if attr in params:
|
||||
setattr(self, attr, params.pop(attr))
|
||||
# 2. Step replacement
|
||||
items = getattr(self, attr)
|
||||
names = []
|
||||
if items:
|
||||
names, _ = zip(*items)
|
||||
for name in list(params.keys()):
|
||||
if '__' not in name and name in names:
|
||||
self._replace_estimator(attr, name, params.pop(name))
|
||||
# 3. Step parameters and other initialisation arguments
|
||||
super().set_params(**params)
|
||||
return self
|
||||
|
||||
def _replace_estimator(self, attr, name, new_val):
|
||||
# assumes `name` is a valid estimator name
|
||||
new_estimators = list(getattr(self, attr))
|
||||
for i, (estimator_name, _) in enumerate(new_estimators):
|
||||
if estimator_name == name:
|
||||
new_estimators[i] = (name, new_val)
|
||||
break
|
||||
setattr(self, attr, new_estimators)
|
||||
|
||||
def _validate_names(self, names):
|
||||
if len(set(names)) != len(names):
|
||||
raise ValueError('Names provided are not unique: '
|
||||
'{0!r}'.format(list(names)))
|
||||
invalid_names = set(names).intersection(self.get_params(deep=False))
|
||||
if invalid_names:
|
||||
raise ValueError('Estimator names conflict with constructor '
|
||||
'arguments: {0!r}'.format(sorted(invalid_names)))
|
||||
invalid_names = [name for name in names if '__' in name]
|
||||
if invalid_names:
|
||||
raise ValueError('Estimator names must not contain __: got '
|
||||
'{0!r}'.format(invalid_names))
|
||||
|
||||
|
||||
class _IffHasAttrDescriptor:
|
||||
"""Implements a conditional property using the descriptor protocol.
|
||||
|
||||
Using this class to create a decorator will raise an ``AttributeError``
|
||||
if none of the delegates (specified in ``delegate_names``) is an attribute
|
||||
of the base object or the first found delegate does not have an attribute
|
||||
``attribute_name``.
|
||||
|
||||
This allows ducktyping of the decorated method based on
|
||||
``delegate.attribute_name``. Here ``delegate`` is the first item in
|
||||
``delegate_names`` for which ``hasattr(object, delegate) is True``.
|
||||
|
||||
See https://docs.python.org/3/howto/descriptor.html for an explanation of
|
||||
descriptors.
|
||||
"""
|
||||
def __init__(self, fn, delegate_names, attribute_name):
|
||||
self.fn = fn
|
||||
self.delegate_names = delegate_names
|
||||
self.attribute_name = attribute_name
|
||||
|
||||
# update the docstring of the descriptor
|
||||
update_wrapper(self, fn)
|
||||
|
||||
def __get__(self, obj, type=None):
|
||||
# raise an AttributeError if the attribute is not present on the object
|
||||
if obj is not None:
|
||||
# delegate only on instances, not the classes.
|
||||
# this is to allow access to the docstrings.
|
||||
for delegate_name in self.delegate_names:
|
||||
try:
|
||||
delegate = attrgetter(delegate_name)(obj)
|
||||
except AttributeError:
|
||||
continue
|
||||
else:
|
||||
getattr(delegate, self.attribute_name)
|
||||
break
|
||||
else:
|
||||
attrgetter(self.delegate_names[-1])(obj)
|
||||
|
||||
# lambda, but not partial, allows help() to work with update_wrapper
|
||||
out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
|
||||
# update the docstring of the returned function
|
||||
update_wrapper(out, self.fn)
|
||||
return out
|
||||
|
||||
|
||||
def if_delegate_has_method(delegate):
|
||||
"""Create a decorator for methods that are delegated to a sub-estimator
|
||||
|
||||
This enables ducktyping by hasattr returning True according to the
|
||||
sub-estimator.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
delegate : string, list of strings or tuple of strings
|
||||
Name of the sub-estimator that can be accessed as an attribute of the
|
||||
base object. If a list or a tuple of names are provided, the first
|
||||
sub-estimator that is an attribute of the base object will be used.
|
||||
|
||||
"""
|
||||
if isinstance(delegate, list):
|
||||
delegate = tuple(delegate)
|
||||
if not isinstance(delegate, tuple):
|
||||
delegate = (delegate,)
|
||||
|
||||
return lambda fn: _IffHasAttrDescriptor(fn, delegate,
|
||||
attribute_name=fn.__name__)
|
||||
|
||||
|
||||
def _safe_split(estimator, X, y, indices, train_indices=None):
|
||||
"""Create subset of dataset and properly handle kernels.
|
||||
|
||||
Slice X, y according to indices for cross-validation, but take care of
|
||||
precomputed kernel-matrices or pairwise affinities / distances.
|
||||
|
||||
If ``estimator._pairwise is True``, X needs to be square and
|
||||
we slice rows and columns. If ``train_indices`` is not None,
|
||||
we slice rows using ``indices`` (assumed the test set) and columns
|
||||
using ``train_indices``, indicating the training set.
|
||||
|
||||
Labels y will always be indexed only along the first axis.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : object
|
||||
Estimator to determine whether we should slice only rows or rows and
|
||||
columns.
|
||||
|
||||
X : array-like, sparse matrix or iterable
|
||||
Data to be indexed. If ``estimator._pairwise is True``,
|
||||
this needs to be a square array-like or sparse matrix.
|
||||
|
||||
y : array-like, sparse matrix or iterable
|
||||
Targets to be indexed.
|
||||
|
||||
indices : array of int
|
||||
Rows to select from X and y.
|
||||
If ``estimator._pairwise is True`` and ``train_indices is None``
|
||||
then ``indices`` will also be used to slice columns.
|
||||
|
||||
train_indices : array of int or None, default=None
|
||||
If ``estimator._pairwise is True`` and ``train_indices is not None``,
|
||||
then ``train_indices`` will be use to slice the columns of X.
|
||||
|
||||
Returns
|
||||
-------
|
||||
X_subset : array-like, sparse matrix or list
|
||||
Indexed data.
|
||||
|
||||
y_subset : array-like, sparse matrix or list
|
||||
Indexed targets.
|
||||
|
||||
"""
|
||||
if getattr(estimator, "_pairwise", False):
|
||||
if not hasattr(X, "shape"):
|
||||
raise ValueError("Precomputed kernels or affinity matrices have "
|
||||
"to be passed as arrays or sparse matrices.")
|
||||
# X is a precomputed square kernel matrix
|
||||
if X.shape[0] != X.shape[1]:
|
||||
raise ValueError("X should be a square kernel matrix")
|
||||
if train_indices is None:
|
||||
X_subset = X[np.ix_(indices, indices)]
|
||||
else:
|
||||
X_subset = X[np.ix_(indices, train_indices)]
|
||||
else:
|
||||
X_subset = _safe_indexing(X, indices)
|
||||
|
||||
if y is not None:
|
||||
y_subset = _safe_indexing(y, indices)
|
||||
else:
|
||||
y_subset = None
|
||||
|
||||
return X_subset, y_subset
|
18
venv/Lib/site-packages/sklearn/utils/mocking.py
Normal file
18
venv/Lib/site-packages/sklearn/utils/mocking.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _mocking # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.utils.mocking'
|
||||
correct_import_path = 'sklearn.utils'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_mocking, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
448
venv/Lib/site-packages/sklearn/utils/multiclass.py
Normal file
448
venv/Lib/site-packages/sklearn/utils/multiclass.py
Normal file
|
@ -0,0 +1,448 @@
|
|||
# Author: Arnaud Joly, Joel Nothman, Hamzeh Alsalhi
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
"""
|
||||
Multi-class / multi-label utility function
|
||||
==========================================
|
||||
|
||||
"""
|
||||
from collections.abc import Sequence
|
||||
from itertools import chain
|
||||
|
||||
from scipy.sparse import issparse
|
||||
from scipy.sparse.base import spmatrix
|
||||
from scipy.sparse import dok_matrix
|
||||
from scipy.sparse import lil_matrix
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .validation import check_array, _assert_all_finite
|
||||
|
||||
|
||||
def _unique_multiclass(y):
|
||||
if hasattr(y, '__array__'):
|
||||
return np.unique(np.asarray(y))
|
||||
else:
|
||||
return set(y)
|
||||
|
||||
|
||||
def _unique_indicator(y):
|
||||
return np.arange(
|
||||
check_array(y, accept_sparse=['csr', 'csc', 'coo']).shape[1]
|
||||
)
|
||||
|
||||
|
||||
_FN_UNIQUE_LABELS = {
|
||||
'binary': _unique_multiclass,
|
||||
'multiclass': _unique_multiclass,
|
||||
'multilabel-indicator': _unique_indicator,
|
||||
}
|
||||
|
||||
|
||||
def unique_labels(*ys):
|
||||
"""Extract an ordered array of unique labels
|
||||
|
||||
We don't allow:
|
||||
- mix of multilabel and multiclass (single label) targets
|
||||
- mix of label indicator matrix and anything else,
|
||||
because there are no explicit labels)
|
||||
- mix of label indicator matrices of different sizes
|
||||
- mix of string and integer labels
|
||||
|
||||
At the moment, we also don't allow "multiclass-multioutput" input type.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
*ys : array-likes
|
||||
|
||||
Returns
|
||||
-------
|
||||
out : numpy array of shape [n_unique_labels]
|
||||
An ordered array of unique labels.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.utils.multiclass import unique_labels
|
||||
>>> unique_labels([3, 5, 5, 5, 7, 7])
|
||||
array([3, 5, 7])
|
||||
>>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])
|
||||
array([1, 2, 3, 4])
|
||||
>>> unique_labels([1, 2, 10], [5, 11])
|
||||
array([ 1, 2, 5, 10, 11])
|
||||
"""
|
||||
if not ys:
|
||||
raise ValueError('No argument has been passed.')
|
||||
# Check that we don't mix label format
|
||||
|
||||
ys_types = set(type_of_target(x) for x in ys)
|
||||
if ys_types == {"binary", "multiclass"}:
|
||||
ys_types = {"multiclass"}
|
||||
|
||||
if len(ys_types) > 1:
|
||||
raise ValueError("Mix type of y not allowed, got types %s" % ys_types)
|
||||
|
||||
label_type = ys_types.pop()
|
||||
|
||||
# Check consistency for the indicator format
|
||||
if (label_type == "multilabel-indicator" and
|
||||
len(set(check_array(y,
|
||||
accept_sparse=['csr', 'csc', 'coo']).shape[1]
|
||||
for y in ys)) > 1):
|
||||
raise ValueError("Multi-label binary indicator input with "
|
||||
"different numbers of labels")
|
||||
|
||||
# Get the unique set of labels
|
||||
_unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)
|
||||
if not _unique_labels:
|
||||
raise ValueError("Unknown label type: %s" % repr(ys))
|
||||
|
||||
ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys))
|
||||
|
||||
# Check that we don't mix string type with number type
|
||||
if (len(set(isinstance(label, str) for label in ys_labels)) > 1):
|
||||
raise ValueError("Mix of label input types (string and number)")
|
||||
|
||||
return np.array(sorted(ys_labels))
|
||||
|
||||
|
||||
def _is_integral_float(y):
|
||||
return y.dtype.kind == 'f' and np.all(y.astype(int) == y)
|
||||
|
||||
|
||||
def is_multilabel(y):
|
||||
""" Check if ``y`` is in a multilabel format.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : numpy array of shape [n_samples]
|
||||
Target values.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out : bool,
|
||||
Return ``True``, if ``y`` is in a multilabel format, else ```False``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> from sklearn.utils.multiclass import is_multilabel
|
||||
>>> is_multilabel([0, 1, 0, 1])
|
||||
False
|
||||
>>> is_multilabel([[1], [0, 2], []])
|
||||
False
|
||||
>>> is_multilabel(np.array([[1, 0], [0, 0]]))
|
||||
True
|
||||
>>> is_multilabel(np.array([[1], [0], [0]]))
|
||||
False
|
||||
>>> is_multilabel(np.array([[1, 0, 0]]))
|
||||
True
|
||||
"""
|
||||
if hasattr(y, '__array__') or isinstance(y, Sequence):
|
||||
y = np.asarray(y)
|
||||
if not (hasattr(y, "shape") and y.ndim == 2 and y.shape[1] > 1):
|
||||
return False
|
||||
|
||||
if issparse(y):
|
||||
if isinstance(y, (dok_matrix, lil_matrix)):
|
||||
y = y.tocsr()
|
||||
return (len(y.data) == 0 or np.unique(y.data).size == 1 and
|
||||
(y.dtype.kind in 'biu' or # bool, int, uint
|
||||
_is_integral_float(np.unique(y.data))))
|
||||
else:
|
||||
labels = np.unique(y)
|
||||
|
||||
return len(labels) < 3 and (y.dtype.kind in 'biu' or # bool, int, uint
|
||||
_is_integral_float(labels))
|
||||
|
||||
|
||||
def check_classification_targets(y):
|
||||
"""Ensure that target y is of a non-regression type.
|
||||
|
||||
Only the following target types (as defined in type_of_target) are allowed:
|
||||
'binary', 'multiclass', 'multiclass-multioutput',
|
||||
'multilabel-indicator', 'multilabel-sequences'
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array-like
|
||||
"""
|
||||
y_type = type_of_target(y)
|
||||
if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
|
||||
'multilabel-indicator', 'multilabel-sequences']:
|
||||
raise ValueError("Unknown label type: %r" % y_type)
|
||||
|
||||
|
||||
def type_of_target(y):
|
||||
"""Determine the type of data indicated by the target.
|
||||
|
||||
Note that this type is the most specific type that can be inferred.
|
||||
For example:
|
||||
|
||||
* ``binary`` is more specific but compatible with ``multiclass``.
|
||||
* ``multiclass`` of integers is more specific but compatible with
|
||||
``continuous``.
|
||||
* ``multilabel-indicator`` is more specific but compatible with
|
||||
``multiclass-multioutput``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array-like
|
||||
|
||||
Returns
|
||||
-------
|
||||
target_type : string
|
||||
One of:
|
||||
|
||||
* 'continuous': `y` is an array-like of floats that are not all
|
||||
integers, and is 1d or a column vector.
|
||||
* 'continuous-multioutput': `y` is a 2d array of floats that are
|
||||
not all integers, and both dimensions are of size > 1.
|
||||
* 'binary': `y` contains <= 2 discrete values and is 1d or a column
|
||||
vector.
|
||||
* 'multiclass': `y` contains more than two discrete values, is not a
|
||||
sequence of sequences, and is 1d or a column vector.
|
||||
* 'multiclass-multioutput': `y` is a 2d array that contains more
|
||||
than two discrete values, is not a sequence of sequences, and both
|
||||
dimensions are of size > 1.
|
||||
* 'multilabel-indicator': `y` is a label indicator matrix, an array
|
||||
of two dimensions with at least two columns, and at most 2 unique
|
||||
values.
|
||||
* 'unknown': `y` is array-like but none of the above, such as a 3d
|
||||
array, sequence of sequences, or an array of non-sequence objects.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> type_of_target([0.1, 0.6])
|
||||
'continuous'
|
||||
>>> type_of_target([1, -1, -1, 1])
|
||||
'binary'
|
||||
>>> type_of_target(['a', 'b', 'a'])
|
||||
'binary'
|
||||
>>> type_of_target([1.0, 2.0])
|
||||
'binary'
|
||||
>>> type_of_target([1, 0, 2])
|
||||
'multiclass'
|
||||
>>> type_of_target([1.0, 0.0, 3.0])
|
||||
'multiclass'
|
||||
>>> type_of_target(['a', 'b', 'c'])
|
||||
'multiclass'
|
||||
>>> type_of_target(np.array([[1, 2], [3, 1]]))
|
||||
'multiclass-multioutput'
|
||||
>>> type_of_target([[1, 2]])
|
||||
'multilabel-indicator'
|
||||
>>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))
|
||||
'continuous-multioutput'
|
||||
>>> type_of_target(np.array([[0, 1], [1, 1]]))
|
||||
'multilabel-indicator'
|
||||
"""
|
||||
valid = ((isinstance(y, (Sequence, spmatrix)) or hasattr(y, '__array__'))
|
||||
and not isinstance(y, str))
|
||||
|
||||
if not valid:
|
||||
raise ValueError('Expected array-like (array or non-string sequence), '
|
||||
'got %r' % y)
|
||||
|
||||
sparse_pandas = (y.__class__.__name__ in ['SparseSeries', 'SparseArray'])
|
||||
if sparse_pandas:
|
||||
raise ValueError("y cannot be class 'SparseSeries' or 'SparseArray'")
|
||||
|
||||
if is_multilabel(y):
|
||||
return 'multilabel-indicator'
|
||||
|
||||
try:
|
||||
y = np.asarray(y)
|
||||
except ValueError:
|
||||
# Known to fail in numpy 1.3 for array of arrays
|
||||
return 'unknown'
|
||||
|
||||
# The old sequence of sequences format
|
||||
try:
|
||||
if (not hasattr(y[0], '__array__') and isinstance(y[0], Sequence)
|
||||
and not isinstance(y[0], str)):
|
||||
raise ValueError('You appear to be using a legacy multi-label data'
|
||||
' representation. Sequence of sequences are no'
|
||||
' longer supported; use a binary array or sparse'
|
||||
' matrix instead - the MultiLabelBinarizer'
|
||||
' transformer can convert to this format.')
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
# Invalid inputs
|
||||
if y.ndim > 2 or (y.dtype == object and len(y) and
|
||||
not isinstance(y.flat[0], str)):
|
||||
return 'unknown' # [[[1, 2]]] or [obj_1] and not ["label_1"]
|
||||
|
||||
if y.ndim == 2 and y.shape[1] == 0:
|
||||
return 'unknown' # [[]]
|
||||
|
||||
if y.ndim == 2 and y.shape[1] > 1:
|
||||
suffix = "-multioutput" # [[1, 2], [1, 2]]
|
||||
else:
|
||||
suffix = "" # [1, 2, 3] or [[1], [2], [3]]
|
||||
|
||||
# check float and contains non-integer float values
|
||||
if y.dtype.kind == 'f' and np.any(y != y.astype(int)):
|
||||
# [.1, .2, 3] or [[.1, .2, 3]] or [[1., .2]] and not [1., 2., 3.]
|
||||
_assert_all_finite(y)
|
||||
return 'continuous' + suffix
|
||||
|
||||
if (len(np.unique(y)) > 2) or (y.ndim >= 2 and len(y[0]) > 1):
|
||||
return 'multiclass' + suffix # [1, 2, 3] or [[1., 2., 3]] or [[1, 2]]
|
||||
else:
|
||||
return 'binary' # [1, 2] or [["a"], ["b"]]
|
||||
|
||||
|
||||
def _check_partial_fit_first_call(clf, classes=None):
|
||||
"""Private helper function for factorizing common classes param logic
|
||||
|
||||
Estimators that implement the ``partial_fit`` API need to be provided with
|
||||
the list of possible classes at the first call to partial_fit.
|
||||
|
||||
Subsequent calls to partial_fit should check that ``classes`` is still
|
||||
consistent with a previous value of ``clf.classes_`` when provided.
|
||||
|
||||
This function returns True if it detects that this was the first call to
|
||||
``partial_fit`` on ``clf``. In that case the ``classes_`` attribute is also
|
||||
set on ``clf``.
|
||||
|
||||
"""
|
||||
if getattr(clf, 'classes_', None) is None and classes is None:
|
||||
raise ValueError("classes must be passed on the first call "
|
||||
"to partial_fit.")
|
||||
|
||||
elif classes is not None:
|
||||
if getattr(clf, 'classes_', None) is not None:
|
||||
if not np.array_equal(clf.classes_, unique_labels(classes)):
|
||||
raise ValueError(
|
||||
"`classes=%r` is not the same as on last call "
|
||||
"to partial_fit, was: %r" % (classes, clf.classes_))
|
||||
|
||||
else:
|
||||
# This is the first call to partial_fit
|
||||
clf.classes_ = unique_labels(classes)
|
||||
return True
|
||||
|
||||
# classes is None and clf.classes_ has already previously been set:
|
||||
# nothing to do
|
||||
return False
|
||||
|
||||
|
||||
def class_distribution(y, sample_weight=None):
|
||||
"""Compute class priors from multioutput-multiclass target data
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array like or sparse matrix of size (n_samples, n_outputs)
|
||||
The labels for each example.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Sample weights.
|
||||
|
||||
Returns
|
||||
-------
|
||||
classes : list of size n_outputs of arrays of size (n_classes,)
|
||||
List of classes for each column.
|
||||
|
||||
n_classes : list of integers of size n_outputs
|
||||
Number of classes in each column
|
||||
|
||||
class_prior : list of size n_outputs of arrays of size (n_classes,)
|
||||
Class distribution of each column.
|
||||
|
||||
"""
|
||||
classes = []
|
||||
n_classes = []
|
||||
class_prior = []
|
||||
|
||||
n_samples, n_outputs = y.shape
|
||||
if sample_weight is not None:
|
||||
sample_weight = np.asarray(sample_weight)
|
||||
|
||||
if issparse(y):
|
||||
y = y.tocsc()
|
||||
y_nnz = np.diff(y.indptr)
|
||||
|
||||
for k in range(n_outputs):
|
||||
col_nonzero = y.indices[y.indptr[k]:y.indptr[k + 1]]
|
||||
# separate sample weights for zero and non-zero elements
|
||||
if sample_weight is not None:
|
||||
nz_samp_weight = sample_weight[col_nonzero]
|
||||
zeros_samp_weight_sum = (np.sum(sample_weight) -
|
||||
np.sum(nz_samp_weight))
|
||||
else:
|
||||
nz_samp_weight = None
|
||||
zeros_samp_weight_sum = y.shape[0] - y_nnz[k]
|
||||
|
||||
classes_k, y_k = np.unique(y.data[y.indptr[k]:y.indptr[k + 1]],
|
||||
return_inverse=True)
|
||||
class_prior_k = np.bincount(y_k, weights=nz_samp_weight)
|
||||
|
||||
# An explicit zero was found, combine its weight with the weight
|
||||
# of the implicit zeros
|
||||
if 0 in classes_k:
|
||||
class_prior_k[classes_k == 0] += zeros_samp_weight_sum
|
||||
|
||||
# If an there is an implicit zero and it is not in classes and
|
||||
# class_prior, make an entry for it
|
||||
if 0 not in classes_k and y_nnz[k] < y.shape[0]:
|
||||
classes_k = np.insert(classes_k, 0, 0)
|
||||
class_prior_k = np.insert(class_prior_k, 0,
|
||||
zeros_samp_weight_sum)
|
||||
|
||||
classes.append(classes_k)
|
||||
n_classes.append(classes_k.shape[0])
|
||||
class_prior.append(class_prior_k / class_prior_k.sum())
|
||||
else:
|
||||
for k in range(n_outputs):
|
||||
classes_k, y_k = np.unique(y[:, k], return_inverse=True)
|
||||
classes.append(classes_k)
|
||||
n_classes.append(classes_k.shape[0])
|
||||
class_prior_k = np.bincount(y_k, weights=sample_weight)
|
||||
class_prior.append(class_prior_k / class_prior_k.sum())
|
||||
|
||||
return (classes, n_classes, class_prior)
|
||||
|
||||
|
||||
def _ovr_decision_function(predictions, confidences, n_classes):
|
||||
"""Compute a continuous, tie-breaking OvR decision function from OvO.
|
||||
|
||||
It is important to include a continuous value, not only votes,
|
||||
to make computing AUC or calibration meaningful.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
predictions : array-like, shape (n_samples, n_classifiers)
|
||||
Predicted classes for each binary classifier.
|
||||
|
||||
confidences : array-like, shape (n_samples, n_classifiers)
|
||||
Decision functions or predicted probabilities for positive class
|
||||
for each binary classifier.
|
||||
|
||||
n_classes : int
|
||||
Number of classes. n_classifiers must be
|
||||
``n_classes * (n_classes - 1 ) / 2``
|
||||
"""
|
||||
n_samples = predictions.shape[0]
|
||||
votes = np.zeros((n_samples, n_classes))
|
||||
sum_of_confidences = np.zeros((n_samples, n_classes))
|
||||
|
||||
k = 0
|
||||
for i in range(n_classes):
|
||||
for j in range(i + 1, n_classes):
|
||||
sum_of_confidences[:, i] -= confidences[:, k]
|
||||
sum_of_confidences[:, j] += confidences[:, k]
|
||||
votes[predictions[:, k] == 0, i] += 1
|
||||
votes[predictions[:, k] == 1, j] += 1
|
||||
k += 1
|
||||
|
||||
# Monotonically transform the sum_of_confidences to (-1/3, 1/3)
|
||||
# and add it with votes. The monotonic transformation is
|
||||
# f: x -> x / (3 * (|x| + 1)), it uses 1/3 instead of 1/2
|
||||
# to ensure that we won't reach the limits and change vote order.
|
||||
# The motivation is to use confidence levels as a way to break ties in
|
||||
# the votes without switching any decision made based on a difference
|
||||
# of 1 vote.
|
||||
transformed_confidences = (sum_of_confidences /
|
||||
(3 * (np.abs(sum_of_confidences) + 1)))
|
||||
return votes + transformed_confidences
|
BIN
venv/Lib/site-packages/sklearn/utils/murmurhash.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/sklearn/utils/murmurhash.cp36-win32.pyd
Normal file
Binary file not shown.
21
venv/Lib/site-packages/sklearn/utils/murmurhash.pxd
Normal file
21
venv/Lib/site-packages/sklearn/utils/murmurhash.pxd
Normal file
|
@ -0,0 +1,21 @@
|
|||
"""Export fast murmurhash C/C++ routines + cython wrappers"""
|
||||
|
||||
cimport numpy as np
|
||||
|
||||
# The C API is disabled for now, since it requires -I flags to get
|
||||
# compilation to work even when these functions are not used.
|
||||
#cdef extern from "MurmurHash3.h":
|
||||
# void MurmurHash3_x86_32(void* key, int len, unsigned int seed,
|
||||
# void* out)
|
||||
#
|
||||
# void MurmurHash3_x86_128(void* key, int len, unsigned int seed,
|
||||
# void* out)
|
||||
#
|
||||
# void MurmurHash3_x64_128(void* key, int len, unsigned int seed,
|
||||
# void* out)
|
||||
|
||||
|
||||
cpdef np.uint32_t murmurhash3_int_u32(int key, unsigned int seed)
|
||||
cpdef np.int32_t murmurhash3_int_s32(int key, unsigned int seed)
|
||||
cpdef np.uint32_t murmurhash3_bytes_u32(bytes key, unsigned int seed)
|
||||
cpdef np.int32_t murmurhash3_bytes_s32(bytes key, unsigned int seed)
|
256
venv/Lib/site-packages/sklearn/utils/optimize.py
Normal file
256
venv/Lib/site-packages/sklearn/utils/optimize.py
Normal file
|
@ -0,0 +1,256 @@
|
|||
"""
|
||||
Our own implementation of the Newton algorithm
|
||||
|
||||
Unlike the scipy.optimize version, this version of the Newton conjugate
|
||||
gradient solver uses only one function call to retrieve the
|
||||
func value, the gradient value and a callable for the Hessian matvec
|
||||
product. If the function call is very expensive (e.g. for logistic
|
||||
regression with large design matrix), this approach gives very
|
||||
significant speedups.
|
||||
"""
|
||||
# This is a modified file from scipy.optimize
|
||||
# Original authors: Travis Oliphant, Eric Jones
|
||||
# Modifications by Gael Varoquaux, Mathieu Blondel and Tom Dupre la Tour
|
||||
# License: BSD
|
||||
|
||||
import numpy as np
|
||||
import warnings
|
||||
from scipy.optimize.linesearch import line_search_wolfe2, line_search_wolfe1
|
||||
|
||||
from ..exceptions import ConvergenceWarning
|
||||
from . import deprecated
|
||||
|
||||
|
||||
class _LineSearchError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
def _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval,
|
||||
**kwargs):
|
||||
"""
|
||||
Same as line_search_wolfe1, but fall back to line_search_wolfe2 if
|
||||
suitable step length is not found, and raise an exception if a
|
||||
suitable step length is not found.
|
||||
|
||||
Raises
|
||||
------
|
||||
_LineSearchError
|
||||
If no suitable step size is found
|
||||
|
||||
"""
|
||||
ret = line_search_wolfe1(f, fprime, xk, pk, gfk,
|
||||
old_fval, old_old_fval,
|
||||
**kwargs)
|
||||
|
||||
if ret[0] is None:
|
||||
# line search failed: try different one.
|
||||
ret = line_search_wolfe2(f, fprime, xk, pk, gfk,
|
||||
old_fval, old_old_fval, **kwargs)
|
||||
|
||||
if ret[0] is None:
|
||||
raise _LineSearchError()
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def _cg(fhess_p, fgrad, maxiter, tol):
|
||||
"""
|
||||
Solve iteratively the linear system 'fhess_p . xsupi = fgrad'
|
||||
with a conjugate gradient descent.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fhess_p : callable
|
||||
Function that takes the gradient as a parameter and returns the
|
||||
matrix product of the Hessian and gradient
|
||||
|
||||
fgrad : ndarray, shape (n_features,) or (n_features + 1,)
|
||||
Gradient vector
|
||||
|
||||
maxiter : int
|
||||
Number of CG iterations.
|
||||
|
||||
tol : float
|
||||
Stopping criterion.
|
||||
|
||||
Returns
|
||||
-------
|
||||
xsupi : ndarray, shape (n_features,) or (n_features + 1,)
|
||||
Estimated solution
|
||||
"""
|
||||
xsupi = np.zeros(len(fgrad), dtype=fgrad.dtype)
|
||||
ri = fgrad
|
||||
psupi = -ri
|
||||
i = 0
|
||||
dri0 = np.dot(ri, ri)
|
||||
|
||||
while i <= maxiter:
|
||||
if np.sum(np.abs(ri)) <= tol:
|
||||
break
|
||||
|
||||
Ap = fhess_p(psupi)
|
||||
# check curvature
|
||||
curv = np.dot(psupi, Ap)
|
||||
if 0 <= curv <= 3 * np.finfo(np.float64).eps:
|
||||
break
|
||||
elif curv < 0:
|
||||
if i > 0:
|
||||
break
|
||||
else:
|
||||
# fall back to steepest descent direction
|
||||
xsupi += dri0 / curv * psupi
|
||||
break
|
||||
alphai = dri0 / curv
|
||||
xsupi += alphai * psupi
|
||||
ri = ri + alphai * Ap
|
||||
dri1 = np.dot(ri, ri)
|
||||
betai = dri1 / dri0
|
||||
psupi = -ri + betai * psupi
|
||||
i = i + 1
|
||||
dri0 = dri1 # update np.dot(ri,ri) for next time.
|
||||
|
||||
return xsupi
|
||||
|
||||
|
||||
@deprecated("newton_cg is deprecated in version "
|
||||
"0.22 and will be removed in version 0.24.")
|
||||
def newton_cg(grad_hess, func, grad, x0, args=(), tol=1e-4,
|
||||
maxiter=100, maxinner=200, line_search=True, warn=True):
|
||||
return _newton_cg(grad_hess, func, grad, x0, args, tol, maxiter,
|
||||
maxinner, line_search, warn)
|
||||
|
||||
|
||||
def _newton_cg(grad_hess, func, grad, x0, args=(), tol=1e-4,
|
||||
maxiter=100, maxinner=200, line_search=True, warn=True):
|
||||
"""
|
||||
Minimization of scalar function of one or more variables using the
|
||||
Newton-CG algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
grad_hess : callable
|
||||
Should return the gradient and a callable returning the matvec product
|
||||
of the Hessian.
|
||||
|
||||
func : callable
|
||||
Should return the value of the function.
|
||||
|
||||
grad : callable
|
||||
Should return the function value and the gradient. This is used
|
||||
by the linesearch functions.
|
||||
|
||||
x0 : array of float
|
||||
Initial guess.
|
||||
|
||||
args : tuple, optional
|
||||
Arguments passed to func_grad_hess, func and grad.
|
||||
|
||||
tol : float
|
||||
Stopping criterion. The iteration will stop when
|
||||
``max{|g_i | i = 1, ..., n} <= tol``
|
||||
where ``g_i`` is the i-th component of the gradient.
|
||||
|
||||
maxiter : int
|
||||
Number of Newton iterations.
|
||||
|
||||
maxinner : int
|
||||
Number of CG iterations.
|
||||
|
||||
line_search : boolean
|
||||
Whether to use a line search or not.
|
||||
|
||||
warn : boolean
|
||||
Whether to warn when didn't converge.
|
||||
|
||||
Returns
|
||||
-------
|
||||
xk : ndarray of float
|
||||
Estimated minimum.
|
||||
"""
|
||||
x0 = np.asarray(x0).flatten()
|
||||
xk = x0
|
||||
k = 0
|
||||
|
||||
if line_search:
|
||||
old_fval = func(x0, *args)
|
||||
old_old_fval = None
|
||||
|
||||
# Outer loop: our Newton iteration
|
||||
while k < maxiter:
|
||||
# Compute a search direction pk by applying the CG method to
|
||||
# del2 f(xk) p = - fgrad f(xk) starting from 0.
|
||||
fgrad, fhess_p = grad_hess(xk, *args)
|
||||
|
||||
absgrad = np.abs(fgrad)
|
||||
if np.max(absgrad) <= tol:
|
||||
break
|
||||
|
||||
maggrad = np.sum(absgrad)
|
||||
eta = min([0.5, np.sqrt(maggrad)])
|
||||
termcond = eta * maggrad
|
||||
|
||||
# Inner loop: solve the Newton update by conjugate gradient, to
|
||||
# avoid inverting the Hessian
|
||||
xsupi = _cg(fhess_p, fgrad, maxiter=maxinner, tol=termcond)
|
||||
|
||||
alphak = 1.0
|
||||
|
||||
if line_search:
|
||||
try:
|
||||
alphak, fc, gc, old_fval, old_old_fval, gfkp1 = \
|
||||
_line_search_wolfe12(func, grad, xk, xsupi, fgrad,
|
||||
old_fval, old_old_fval, args=args)
|
||||
except _LineSearchError:
|
||||
warnings.warn('Line Search failed')
|
||||
break
|
||||
|
||||
xk = xk + alphak * xsupi # upcast if necessary
|
||||
k += 1
|
||||
|
||||
if warn and k >= maxiter:
|
||||
warnings.warn("newton-cg failed to converge. Increase the "
|
||||
"number of iterations.", ConvergenceWarning)
|
||||
return xk, k
|
||||
|
||||
|
||||
def _check_optimize_result(solver, result, max_iter=None,
|
||||
extra_warning_msg=None):
|
||||
"""Check the OptimizeResult for successful convergence
|
||||
|
||||
Parameters
|
||||
----------
|
||||
solver: str
|
||||
solver name. Currently only `lbfgs` is supported.
|
||||
result: OptimizeResult
|
||||
result of the scipy.optimize.minimize function
|
||||
max_iter: {int, None}
|
||||
expected maximum number of iterations
|
||||
|
||||
Returns
|
||||
-------
|
||||
n_iter: int
|
||||
number of iterations
|
||||
"""
|
||||
# handle both scipy and scikit-learn solver names
|
||||
if solver == "lbfgs":
|
||||
if result.status != 0:
|
||||
warning_msg = (
|
||||
"{} failed to converge (status={}):\n{}.\n\n"
|
||||
"Increase the number of iterations (max_iter) "
|
||||
"or scale the data as shown in:\n"
|
||||
" https://scikit-learn.org/stable/modules/"
|
||||
"preprocessing.html"
|
||||
).format(solver, result.status, result.message.decode("latin1"))
|
||||
if extra_warning_msg is not None:
|
||||
warning_msg += "\n" + extra_warning_msg
|
||||
warnings.warn(warning_msg, ConvergenceWarning, stacklevel=2)
|
||||
if max_iter is not None:
|
||||
# In scipy <= 1.0.0, nit may exceed maxiter for lbfgs.
|
||||
# See https://github.com/scipy/scipy/issues/7854
|
||||
n_iter_i = min(result.nit, max_iter)
|
||||
else:
|
||||
n_iter_i = result.nit
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return n_iter_i
|
104
venv/Lib/site-packages/sklearn/utils/random.py
Normal file
104
venv/Lib/site-packages/sklearn/utils/random.py
Normal file
|
@ -0,0 +1,104 @@
|
|||
# Author: Hamzeh Alsalhi <ha258@cornell.edu>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
import array
|
||||
|
||||
from . import check_random_state
|
||||
from ._random import sample_without_replacement
|
||||
from . import deprecated
|
||||
|
||||
__all__ = ['sample_without_replacement']
|
||||
|
||||
|
||||
@deprecated("random_choice_csc is deprecated in version "
|
||||
"0.22 and will be removed in version 0.24.")
|
||||
def random_choice_csc(n_samples, classes, class_probability=None,
|
||||
random_state=None):
|
||||
return _random_choice_csc(n_samples, classes, class_probability,
|
||||
random_state)
|
||||
|
||||
|
||||
def _random_choice_csc(n_samples, classes, class_probability=None,
|
||||
random_state=None):
|
||||
"""Generate a sparse random matrix given column class distributions
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_samples : int,
|
||||
Number of samples to draw in each column.
|
||||
|
||||
classes : list of size n_outputs of arrays of size (n_classes,)
|
||||
List of classes for each column.
|
||||
|
||||
class_probability : list of size n_outputs of arrays of size (n_classes,)
|
||||
Optional (default=None). Class distribution of each column. If None the
|
||||
uniform distribution is assumed.
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Controls the randomness of the sampled classes.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
random_matrix : sparse csc matrix of size (n_samples, n_outputs)
|
||||
|
||||
"""
|
||||
data = array.array('i')
|
||||
indices = array.array('i')
|
||||
indptr = array.array('i', [0])
|
||||
|
||||
for j in range(len(classes)):
|
||||
classes[j] = np.asarray(classes[j])
|
||||
if classes[j].dtype.kind != 'i':
|
||||
raise ValueError("class dtype %s is not supported" %
|
||||
classes[j].dtype)
|
||||
classes[j] = classes[j].astype(np.int64, copy=False)
|
||||
|
||||
# use uniform distribution if no class_probability is given
|
||||
if class_probability is None:
|
||||
class_prob_j = np.empty(shape=classes[j].shape[0])
|
||||
class_prob_j.fill(1 / classes[j].shape[0])
|
||||
else:
|
||||
class_prob_j = np.asarray(class_probability[j])
|
||||
|
||||
if not np.isclose(np.sum(class_prob_j), 1.0):
|
||||
raise ValueError("Probability array at index {0} does not sum to "
|
||||
"one".format(j))
|
||||
|
||||
if class_prob_j.shape[0] != classes[j].shape[0]:
|
||||
raise ValueError("classes[{0}] (length {1}) and "
|
||||
"class_probability[{0}] (length {2}) have "
|
||||
"different length.".format(j,
|
||||
classes[j].shape[0],
|
||||
class_prob_j.shape[0]))
|
||||
|
||||
# If 0 is not present in the classes insert it with a probability 0.0
|
||||
if 0 not in classes[j]:
|
||||
classes[j] = np.insert(classes[j], 0, 0)
|
||||
class_prob_j = np.insert(class_prob_j, 0, 0.0)
|
||||
|
||||
# If there are nonzero classes choose randomly using class_probability
|
||||
rng = check_random_state(random_state)
|
||||
if classes[j].shape[0] > 1:
|
||||
p_nonzero = 1 - class_prob_j[classes[j] == 0]
|
||||
nnz = int(n_samples * p_nonzero)
|
||||
ind_sample = sample_without_replacement(n_population=n_samples,
|
||||
n_samples=nnz,
|
||||
random_state=random_state)
|
||||
indices.extend(ind_sample)
|
||||
|
||||
# Normalize probabilities for the nonzero elements
|
||||
classes_j_nonzero = classes[j] != 0
|
||||
class_probability_nz = class_prob_j[classes_j_nonzero]
|
||||
class_probability_nz_norm = (class_probability_nz /
|
||||
np.sum(class_probability_nz))
|
||||
classes_ind = np.searchsorted(class_probability_nz_norm.cumsum(),
|
||||
rng.rand(nnz))
|
||||
data.extend(classes[j][classes_j_nonzero][classes_ind])
|
||||
indptr.append(len(indices))
|
||||
|
||||
return sp.csc_matrix((data, indices, indptr),
|
||||
(n_samples, len(classes)),
|
||||
dtype=int)
|
18
venv/Lib/site-packages/sklearn/utils/seq_dataset.py
Normal file
18
venv/Lib/site-packages/sklearn/utils/seq_dataset.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _seq_dataset # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.utils.seq_dataset'
|
||||
correct_import_path = 'sklearn.utils'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_seq_dataset, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
80
venv/Lib/site-packages/sklearn/utils/setup.py
Normal file
80
venv/Lib/site-packages/sklearn/utils/setup.py
Normal file
|
@ -0,0 +1,80 @@
|
|||
import os
|
||||
from os.path import join
|
||||
|
||||
from sklearn._build_utils import gen_from_templates
|
||||
|
||||
|
||||
def configuration(parent_package='', top_path=None):
|
||||
import numpy
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
|
||||
config = Configuration('utils', parent_package, top_path)
|
||||
|
||||
libraries = []
|
||||
if os.name == 'posix':
|
||||
libraries.append('m')
|
||||
|
||||
config.add_extension('sparsefuncs_fast',
|
||||
sources=['sparsefuncs_fast.pyx'],
|
||||
libraries=libraries)
|
||||
|
||||
config.add_extension('_cython_blas',
|
||||
sources=['_cython_blas.pyx'],
|
||||
libraries=libraries)
|
||||
|
||||
config.add_extension('arrayfuncs',
|
||||
sources=['arrayfuncs.pyx'],
|
||||
include_dirs=[numpy.get_include()],
|
||||
libraries=libraries)
|
||||
|
||||
config.add_extension('murmurhash',
|
||||
sources=['murmurhash.pyx', join(
|
||||
'src', 'MurmurHash3.cpp')],
|
||||
include_dirs=['src'])
|
||||
|
||||
config.add_extension('graph_shortest_path',
|
||||
sources=['graph_shortest_path.pyx'],
|
||||
include_dirs=[numpy.get_include()])
|
||||
|
||||
config.add_extension('_fast_dict',
|
||||
sources=['_fast_dict.pyx'],
|
||||
language="c++",
|
||||
include_dirs=[numpy.get_include()],
|
||||
libraries=libraries)
|
||||
|
||||
config.add_extension('_openmp_helpers',
|
||||
sources=['_openmp_helpers.pyx'],
|
||||
libraries=libraries)
|
||||
|
||||
# generate _seq_dataset from template
|
||||
templates = ['sklearn/utils/_seq_dataset.pyx.tp',
|
||||
'sklearn/utils/_seq_dataset.pxd.tp']
|
||||
gen_from_templates(templates, top_path)
|
||||
|
||||
config.add_extension('_seq_dataset',
|
||||
sources=['_seq_dataset.pyx'],
|
||||
include_dirs=[numpy.get_include()])
|
||||
|
||||
config.add_extension('_weight_vector',
|
||||
sources=['_weight_vector.pyx'],
|
||||
include_dirs=[numpy.get_include()],
|
||||
libraries=libraries)
|
||||
|
||||
config.add_extension("_random",
|
||||
sources=["_random.pyx"],
|
||||
include_dirs=[numpy.get_include()],
|
||||
libraries=libraries)
|
||||
|
||||
config.add_extension("_logistic_sigmoid",
|
||||
sources=["_logistic_sigmoid.pyx"],
|
||||
include_dirs=[numpy.get_include()],
|
||||
libraries=libraries)
|
||||
|
||||
config.add_subpackage('tests')
|
||||
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from numpy.distutils.core import setup
|
||||
setup(**configuration(top_path='').todict())
|
548
venv/Lib/site-packages/sklearn/utils/sparsefuncs.py
Normal file
548
venv/Lib/site-packages/sklearn/utils/sparsefuncs.py
Normal file
|
@ -0,0 +1,548 @@
|
|||
# Authors: Manoj Kumar
|
||||
# Thomas Unterthiner
|
||||
# Giorgio Patrini
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
import scipy.sparse as sp
|
||||
import numpy as np
|
||||
from .validation import _deprecate_positional_args
|
||||
|
||||
from .sparsefuncs_fast import (
|
||||
csr_mean_variance_axis0 as _csr_mean_var_axis0,
|
||||
csc_mean_variance_axis0 as _csc_mean_var_axis0,
|
||||
incr_mean_variance_axis0 as _incr_mean_var_axis0)
|
||||
|
||||
|
||||
def _raise_typeerror(X):
|
||||
"""Raises a TypeError if X is not a CSR or CSC matrix"""
|
||||
input_type = X.format if sp.issparse(X) else type(X)
|
||||
err = "Expected a CSR or CSC sparse matrix, got %s." % input_type
|
||||
raise TypeError(err)
|
||||
|
||||
|
||||
def _raise_error_wrong_axis(axis):
|
||||
if axis not in (0, 1):
|
||||
raise ValueError(
|
||||
"Unknown axis value: %d. Use 0 for rows, or 1 for columns" % axis)
|
||||
|
||||
|
||||
def inplace_csr_column_scale(X, scale):
|
||||
"""Inplace column scaling of a CSR matrix.
|
||||
|
||||
Scale each feature of the data matrix by multiplying with specific scale
|
||||
provided by the caller assuming a (n_samples, n_features) shape.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : CSR matrix with shape (n_samples, n_features)
|
||||
Matrix to normalize using the variance of the features.
|
||||
|
||||
scale : float array with shape (n_features,)
|
||||
Array of precomputed feature-wise values to use for scaling.
|
||||
"""
|
||||
assert scale.shape[0] == X.shape[1]
|
||||
X.data *= scale.take(X.indices, mode='clip')
|
||||
|
||||
|
||||
def inplace_csr_row_scale(X, scale):
|
||||
""" Inplace row scaling of a CSR matrix.
|
||||
|
||||
Scale each sample of the data matrix by multiplying with specific scale
|
||||
provided by the caller assuming a (n_samples, n_features) shape.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : CSR sparse matrix, shape (n_samples, n_features)
|
||||
Matrix to be scaled.
|
||||
|
||||
scale : float array with shape (n_samples,)
|
||||
Array of precomputed sample-wise values to use for scaling.
|
||||
"""
|
||||
assert scale.shape[0] == X.shape[0]
|
||||
X.data *= np.repeat(scale, np.diff(X.indptr))
|
||||
|
||||
|
||||
def mean_variance_axis(X, axis):
|
||||
"""Compute mean and variance along an axix on a CSR or CSC matrix
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : CSR or CSC sparse matrix, shape (n_samples, n_features)
|
||||
Input data.
|
||||
|
||||
axis : int (either 0 or 1)
|
||||
Axis along which the axis should be computed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
means : float array with shape (n_features,)
|
||||
Feature-wise means
|
||||
|
||||
variances : float array with shape (n_features,)
|
||||
Feature-wise variances
|
||||
|
||||
"""
|
||||
_raise_error_wrong_axis(axis)
|
||||
|
||||
if isinstance(X, sp.csr_matrix):
|
||||
if axis == 0:
|
||||
return _csr_mean_var_axis0(X)
|
||||
else:
|
||||
return _csc_mean_var_axis0(X.T)
|
||||
elif isinstance(X, sp.csc_matrix):
|
||||
if axis == 0:
|
||||
return _csc_mean_var_axis0(X)
|
||||
else:
|
||||
return _csr_mean_var_axis0(X.T)
|
||||
else:
|
||||
_raise_typeerror(X)
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def incr_mean_variance_axis(X, *, axis, last_mean, last_var, last_n):
|
||||
"""Compute incremental mean and variance along an axix on a CSR or
|
||||
CSC matrix.
|
||||
|
||||
last_mean, last_var are the statistics computed at the last step by this
|
||||
function. Both must be initialized to 0-arrays of the proper size, i.e.
|
||||
the number of features in X. last_n is the number of samples encountered
|
||||
until now.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : CSR or CSC sparse matrix, shape (n_samples, n_features)
|
||||
Input data.
|
||||
|
||||
axis : int (either 0 or 1)
|
||||
Axis along which the axis should be computed.
|
||||
|
||||
last_mean : float array with shape (n_features,)
|
||||
Array of feature-wise means to update with the new data X.
|
||||
|
||||
last_var : float array with shape (n_features,)
|
||||
Array of feature-wise var to update with the new data X.
|
||||
|
||||
last_n : int with shape (n_features,)
|
||||
Number of samples seen so far, excluded X.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
means : float array with shape (n_features,)
|
||||
Updated feature-wise means.
|
||||
|
||||
variances : float array with shape (n_features,)
|
||||
Updated feature-wise variances.
|
||||
|
||||
n : int with shape (n_features,)
|
||||
Updated number of seen samples.
|
||||
|
||||
Notes
|
||||
-----
|
||||
NaNs are ignored in the algorithm.
|
||||
|
||||
"""
|
||||
_raise_error_wrong_axis(axis)
|
||||
|
||||
if isinstance(X, sp.csr_matrix):
|
||||
if axis == 0:
|
||||
return _incr_mean_var_axis0(X, last_mean=last_mean,
|
||||
last_var=last_var, last_n=last_n)
|
||||
else:
|
||||
return _incr_mean_var_axis0(X.T, last_mean=last_mean,
|
||||
last_var=last_var, last_n=last_n)
|
||||
elif isinstance(X, sp.csc_matrix):
|
||||
if axis == 0:
|
||||
return _incr_mean_var_axis0(X, last_mean=last_mean,
|
||||
last_var=last_var, last_n=last_n)
|
||||
else:
|
||||
return _incr_mean_var_axis0(X.T, last_mean=last_mean,
|
||||
last_var=last_var, last_n=last_n)
|
||||
else:
|
||||
_raise_typeerror(X)
|
||||
|
||||
|
||||
def inplace_column_scale(X, scale):
|
||||
"""Inplace column scaling of a CSC/CSR matrix.
|
||||
|
||||
Scale each feature of the data matrix by multiplying with specific scale
|
||||
provided by the caller assuming a (n_samples, n_features) shape.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : CSC or CSR matrix with shape (n_samples, n_features)
|
||||
Matrix to normalize using the variance of the features.
|
||||
|
||||
scale : float array with shape (n_features,)
|
||||
Array of precomputed feature-wise values to use for scaling.
|
||||
"""
|
||||
if isinstance(X, sp.csc_matrix):
|
||||
inplace_csr_row_scale(X.T, scale)
|
||||
elif isinstance(X, sp.csr_matrix):
|
||||
inplace_csr_column_scale(X, scale)
|
||||
else:
|
||||
_raise_typeerror(X)
|
||||
|
||||
|
||||
def inplace_row_scale(X, scale):
|
||||
""" Inplace row scaling of a CSR or CSC matrix.
|
||||
|
||||
Scale each row of the data matrix by multiplying with specific scale
|
||||
provided by the caller assuming a (n_samples, n_features) shape.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : CSR or CSC sparse matrix, shape (n_samples, n_features)
|
||||
Matrix to be scaled.
|
||||
|
||||
scale : float array with shape (n_features,)
|
||||
Array of precomputed sample-wise values to use for scaling.
|
||||
"""
|
||||
if isinstance(X, sp.csc_matrix):
|
||||
inplace_csr_column_scale(X.T, scale)
|
||||
elif isinstance(X, sp.csr_matrix):
|
||||
inplace_csr_row_scale(X, scale)
|
||||
else:
|
||||
_raise_typeerror(X)
|
||||
|
||||
|
||||
def inplace_swap_row_csc(X, m, n):
|
||||
"""
|
||||
Swaps two rows of a CSC matrix in-place.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : scipy.sparse.csc_matrix, shape=(n_samples, n_features)
|
||||
Matrix whose two rows are to be swapped.
|
||||
|
||||
m : int
|
||||
Index of the row of X to be swapped.
|
||||
|
||||
n : int
|
||||
Index of the row of X to be swapped.
|
||||
"""
|
||||
for t in [m, n]:
|
||||
if isinstance(t, np.ndarray):
|
||||
raise TypeError("m and n should be valid integers")
|
||||
|
||||
if m < 0:
|
||||
m += X.shape[0]
|
||||
if n < 0:
|
||||
n += X.shape[0]
|
||||
|
||||
m_mask = X.indices == m
|
||||
X.indices[X.indices == n] = m
|
||||
X.indices[m_mask] = n
|
||||
|
||||
|
||||
def inplace_swap_row_csr(X, m, n):
|
||||
"""
|
||||
Swaps two rows of a CSR matrix in-place.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : scipy.sparse.csr_matrix, shape=(n_samples, n_features)
|
||||
Matrix whose two rows are to be swapped.
|
||||
|
||||
m : int
|
||||
Index of the row of X to be swapped.
|
||||
|
||||
n : int
|
||||
Index of the row of X to be swapped.
|
||||
"""
|
||||
for t in [m, n]:
|
||||
if isinstance(t, np.ndarray):
|
||||
raise TypeError("m and n should be valid integers")
|
||||
|
||||
if m < 0:
|
||||
m += X.shape[0]
|
||||
if n < 0:
|
||||
n += X.shape[0]
|
||||
|
||||
# The following swapping makes life easier since m is assumed to be the
|
||||
# smaller integer below.
|
||||
if m > n:
|
||||
m, n = n, m
|
||||
|
||||
indptr = X.indptr
|
||||
m_start = indptr[m]
|
||||
m_stop = indptr[m + 1]
|
||||
n_start = indptr[n]
|
||||
n_stop = indptr[n + 1]
|
||||
nz_m = m_stop - m_start
|
||||
nz_n = n_stop - n_start
|
||||
|
||||
if nz_m != nz_n:
|
||||
# Modify indptr first
|
||||
X.indptr[m + 2:n] += nz_n - nz_m
|
||||
X.indptr[m + 1] = m_start + nz_n
|
||||
X.indptr[n] = n_stop - nz_m
|
||||
|
||||
X.indices = np.concatenate([X.indices[:m_start],
|
||||
X.indices[n_start:n_stop],
|
||||
X.indices[m_stop:n_start],
|
||||
X.indices[m_start:m_stop],
|
||||
X.indices[n_stop:]])
|
||||
X.data = np.concatenate([X.data[:m_start],
|
||||
X.data[n_start:n_stop],
|
||||
X.data[m_stop:n_start],
|
||||
X.data[m_start:m_stop],
|
||||
X.data[n_stop:]])
|
||||
|
||||
|
||||
def inplace_swap_row(X, m, n):
|
||||
"""
|
||||
Swaps two rows of a CSC/CSR matrix in-place.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : CSR or CSC sparse matrix, shape=(n_samples, n_features)
|
||||
Matrix whose two rows are to be swapped.
|
||||
|
||||
m : int
|
||||
Index of the row of X to be swapped.
|
||||
|
||||
n : int
|
||||
Index of the row of X to be swapped.
|
||||
"""
|
||||
if isinstance(X, sp.csc_matrix):
|
||||
inplace_swap_row_csc(X, m, n)
|
||||
elif isinstance(X, sp.csr_matrix):
|
||||
inplace_swap_row_csr(X, m, n)
|
||||
else:
|
||||
_raise_typeerror(X)
|
||||
|
||||
|
||||
def inplace_swap_column(X, m, n):
|
||||
"""
|
||||
Swaps two columns of a CSC/CSR matrix in-place.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : CSR or CSC sparse matrix, shape=(n_samples, n_features)
|
||||
Matrix whose two columns are to be swapped.
|
||||
|
||||
m : int
|
||||
Index of the column of X to be swapped.
|
||||
|
||||
n : int
|
||||
Index of the column of X to be swapped.
|
||||
"""
|
||||
if m < 0:
|
||||
m += X.shape[1]
|
||||
if n < 0:
|
||||
n += X.shape[1]
|
||||
if isinstance(X, sp.csc_matrix):
|
||||
inplace_swap_row_csr(X, m, n)
|
||||
elif isinstance(X, sp.csr_matrix):
|
||||
inplace_swap_row_csc(X, m, n)
|
||||
else:
|
||||
_raise_typeerror(X)
|
||||
|
||||
|
||||
def _minor_reduce(X, ufunc):
|
||||
major_index = np.flatnonzero(np.diff(X.indptr))
|
||||
|
||||
# reduceat tries casts X.indptr to intp, which errors
|
||||
# if it is int64 on a 32 bit system.
|
||||
# Reinitializing prevents this where possible, see #13737
|
||||
X = type(X)((X.data, X.indices, X.indptr), shape=X.shape)
|
||||
value = ufunc.reduceat(X.data, X.indptr[major_index])
|
||||
return major_index, value
|
||||
|
||||
|
||||
def _min_or_max_axis(X, axis, min_or_max):
|
||||
N = X.shape[axis]
|
||||
if N == 0:
|
||||
raise ValueError("zero-size array to reduction operation")
|
||||
M = X.shape[1 - axis]
|
||||
mat = X.tocsc() if axis == 0 else X.tocsr()
|
||||
mat.sum_duplicates()
|
||||
major_index, value = _minor_reduce(mat, min_or_max)
|
||||
not_full = np.diff(mat.indptr)[major_index] < N
|
||||
value[not_full] = min_or_max(value[not_full], 0)
|
||||
mask = value != 0
|
||||
major_index = np.compress(mask, major_index)
|
||||
value = np.compress(mask, value)
|
||||
|
||||
if axis == 0:
|
||||
res = sp.coo_matrix((value, (np.zeros(len(value)), major_index)),
|
||||
dtype=X.dtype, shape=(1, M))
|
||||
else:
|
||||
res = sp.coo_matrix((value, (major_index, np.zeros(len(value)))),
|
||||
dtype=X.dtype, shape=(M, 1))
|
||||
return res.A.ravel()
|
||||
|
||||
|
||||
def _sparse_min_or_max(X, axis, min_or_max):
|
||||
if axis is None:
|
||||
if 0 in X.shape:
|
||||
raise ValueError("zero-size array to reduction operation")
|
||||
zero = X.dtype.type(0)
|
||||
if X.nnz == 0:
|
||||
return zero
|
||||
m = min_or_max.reduce(X.data.ravel())
|
||||
if X.nnz != np.product(X.shape):
|
||||
m = min_or_max(zero, m)
|
||||
return m
|
||||
if axis < 0:
|
||||
axis += 2
|
||||
if (axis == 0) or (axis == 1):
|
||||
return _min_or_max_axis(X, axis, min_or_max)
|
||||
else:
|
||||
raise ValueError("invalid axis, use 0 for rows, or 1 for columns")
|
||||
|
||||
|
||||
def _sparse_min_max(X, axis):
|
||||
return (_sparse_min_or_max(X, axis, np.minimum),
|
||||
_sparse_min_or_max(X, axis, np.maximum))
|
||||
|
||||
|
||||
def _sparse_nan_min_max(X, axis):
|
||||
return(_sparse_min_or_max(X, axis, np.fmin),
|
||||
_sparse_min_or_max(X, axis, np.fmax))
|
||||
|
||||
|
||||
def min_max_axis(X, axis, ignore_nan=False):
|
||||
"""Compute minimum and maximum along an axis on a CSR or CSC matrix and
|
||||
optionally ignore NaN values.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : CSR or CSC sparse matrix, shape (n_samples, n_features)
|
||||
Input data.
|
||||
|
||||
axis : int (either 0 or 1)
|
||||
Axis along which the axis should be computed.
|
||||
|
||||
ignore_nan : bool, default is False
|
||||
Ignore or passing through NaN values.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
mins : float array with shape (n_features,)
|
||||
Feature-wise minima
|
||||
|
||||
maxs : float array with shape (n_features,)
|
||||
Feature-wise maxima
|
||||
"""
|
||||
if isinstance(X, sp.csr_matrix) or isinstance(X, sp.csc_matrix):
|
||||
if ignore_nan:
|
||||
return _sparse_nan_min_max(X, axis=axis)
|
||||
else:
|
||||
return _sparse_min_max(X, axis=axis)
|
||||
else:
|
||||
_raise_typeerror(X)
|
||||
|
||||
|
||||
def count_nonzero(X, axis=None, sample_weight=None):
|
||||
"""A variant of X.getnnz() with extension to weighting on axis 0
|
||||
|
||||
Useful in efficiently calculating multilabel metrics.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : CSR sparse matrix of shape (n_samples, n_labels)
|
||||
Input data.
|
||||
|
||||
axis : None, 0 or 1
|
||||
The axis on which the data is aggregated.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Weight for each row of X.
|
||||
"""
|
||||
if axis == -1:
|
||||
axis = 1
|
||||
elif axis == -2:
|
||||
axis = 0
|
||||
elif X.format != 'csr':
|
||||
raise TypeError('Expected CSR sparse format, got {0}'.format(X.format))
|
||||
|
||||
# We rely here on the fact that np.diff(Y.indptr) for a CSR
|
||||
# will return the number of nonzero entries in each row.
|
||||
# A bincount over Y.indices will return the number of nonzeros
|
||||
# in each column. See ``csr_matrix.getnnz`` in scipy >= 0.14.
|
||||
if axis is None:
|
||||
if sample_weight is None:
|
||||
return X.nnz
|
||||
else:
|
||||
return np.dot(np.diff(X.indptr), sample_weight)
|
||||
elif axis == 1:
|
||||
out = np.diff(X.indptr)
|
||||
if sample_weight is None:
|
||||
# astype here is for consistency with axis=0 dtype
|
||||
return out.astype('intp')
|
||||
return out * sample_weight
|
||||
elif axis == 0:
|
||||
if sample_weight is None:
|
||||
return np.bincount(X.indices, minlength=X.shape[1])
|
||||
else:
|
||||
weights = np.repeat(sample_weight, np.diff(X.indptr))
|
||||
return np.bincount(X.indices, minlength=X.shape[1],
|
||||
weights=weights)
|
||||
else:
|
||||
raise ValueError('Unsupported axis: {0}'.format(axis))
|
||||
|
||||
|
||||
def _get_median(data, n_zeros):
|
||||
"""Compute the median of data with n_zeros additional zeros.
|
||||
|
||||
This function is used to support sparse matrices; it modifies data in-place
|
||||
"""
|
||||
n_elems = len(data) + n_zeros
|
||||
if not n_elems:
|
||||
return np.nan
|
||||
n_negative = np.count_nonzero(data < 0)
|
||||
middle, is_odd = divmod(n_elems, 2)
|
||||
data.sort()
|
||||
|
||||
if is_odd:
|
||||
return _get_elem_at_rank(middle, data, n_negative, n_zeros)
|
||||
|
||||
return (_get_elem_at_rank(middle - 1, data, n_negative, n_zeros) +
|
||||
_get_elem_at_rank(middle, data, n_negative, n_zeros)) / 2.
|
||||
|
||||
|
||||
def _get_elem_at_rank(rank, data, n_negative, n_zeros):
|
||||
"""Find the value in data augmented with n_zeros for the given rank"""
|
||||
if rank < n_negative:
|
||||
return data[rank]
|
||||
if rank - n_negative < n_zeros:
|
||||
return 0
|
||||
return data[rank - n_zeros]
|
||||
|
||||
|
||||
def csc_median_axis_0(X):
|
||||
"""Find the median across axis 0 of a CSC matrix.
|
||||
It is equivalent to doing np.median(X, axis=0).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : CSC sparse matrix, shape (n_samples, n_features)
|
||||
Input data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
median : ndarray, shape (n_features,)
|
||||
Median.
|
||||
|
||||
"""
|
||||
if not isinstance(X, sp.csc_matrix):
|
||||
raise TypeError("Expected matrix of CSC format, got %s" % X.format)
|
||||
|
||||
indptr = X.indptr
|
||||
n_samples, n_features = X.shape
|
||||
median = np.zeros(n_features)
|
||||
|
||||
for f_ind, (start, end) in enumerate(zip(indptr[:-1], indptr[1:])):
|
||||
|
||||
# Prevent modifying X in place
|
||||
data = np.copy(X.data[start: end])
|
||||
nz = n_samples - data.size
|
||||
median[f_ind] = _get_median(data, nz)
|
||||
|
||||
return median
|
Binary file not shown.
18
venv/Lib/site-packages/sklearn/utils/stats.py
Normal file
18
venv/Lib/site-packages/sklearn/utils/stats.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
import numpy as np
|
||||
|
||||
from .extmath import stable_cumsum
|
||||
|
||||
|
||||
def _weighted_percentile(array, sample_weight, percentile=50):
|
||||
"""
|
||||
Compute the weighted ``percentile`` of ``array`` with ``sample_weight``.
|
||||
"""
|
||||
sorted_idx = np.argsort(array)
|
||||
|
||||
# Find index of median prediction for each sample
|
||||
weight_cdf = stable_cumsum(sample_weight[sorted_idx])
|
||||
percentile_idx = np.searchsorted(
|
||||
weight_cdf, (percentile / 100.) * weight_cdf[-1])
|
||||
# in rare cases, percentile_idx equals to len(sorted_idx)
|
||||
percentile_idx = np.clip(percentile_idx, 0, len(sorted_idx)-1)
|
||||
return array[sorted_idx[percentile_idx]]
|
18
venv/Lib/site-packages/sklearn/utils/testing.py
Normal file
18
venv/Lib/site-packages/sklearn/utils/testing.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _testing # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.utils.testing'
|
||||
correct_import_path = 'sklearn.utils'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_testing, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
0
venv/Lib/site-packages/sklearn/utils/tests/__init__.py
Normal file
0
venv/Lib/site-packages/sklearn/utils/tests/__init__.py
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10
venv/Lib/site-packages/sklearn/utils/tests/conftest.py
Normal file
10
venv/Lib/site-packages/sklearn/utils/tests/conftest.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
import pytest
|
||||
|
||||
import sklearn
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def print_changed_only_false():
|
||||
sklearn.set_config(print_changed_only=False)
|
||||
yield
|
||||
sklearn.set_config(print_changed_only=True) # reset to default
|
266
venv/Lib/site-packages/sklearn/utils/tests/test_class_weight.py
Normal file
266
venv/Lib/site-packages/sklearn/utils/tests/test_class_weight.py
Normal file
|
@ -0,0 +1,266 @@
|
|||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.datasets import make_blobs
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
from sklearn.utils.class_weight import compute_class_weight
|
||||
from sklearn.utils.class_weight import compute_sample_weight
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
|
||||
|
||||
def test_compute_class_weight():
|
||||
# Test (and demo) compute_class_weight.
|
||||
y = np.asarray([2, 2, 2, 3, 3, 4])
|
||||
classes = np.unique(y)
|
||||
|
||||
cw = compute_class_weight("balanced", classes=classes, y=y)
|
||||
# total effect of samples is preserved
|
||||
class_counts = np.bincount(y)[2:]
|
||||
assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
|
||||
assert cw[0] < cw[1] < cw[2]
|
||||
|
||||
|
||||
def test_compute_class_weight_not_present():
|
||||
# Raise error when y does not contain all class labels
|
||||
classes = np.arange(4)
|
||||
y = np.asarray([0, 0, 0, 1, 1, 2])
|
||||
with pytest.raises(ValueError):
|
||||
compute_class_weight("balanced", classes=classes, y=y)
|
||||
# Fix exception in error message formatting when missing label is a string
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/8312
|
||||
with pytest.raises(ValueError,
|
||||
match="Class label label_not_present not present"):
|
||||
compute_class_weight({"label_not_present": 1.}, classes=classes, y=y)
|
||||
# Raise error when y has items not in classes
|
||||
classes = np.arange(2)
|
||||
with pytest.raises(ValueError):
|
||||
compute_class_weight("balanced", classes=classes, y=y)
|
||||
with pytest.raises(ValueError):
|
||||
compute_class_weight({0: 1., 1: 2.}, classes=classes, y=y)
|
||||
|
||||
|
||||
def test_compute_class_weight_dict():
|
||||
classes = np.arange(3)
|
||||
class_weights = {0: 1.0, 1: 2.0, 2: 3.0}
|
||||
y = np.asarray([0, 0, 1, 2])
|
||||
cw = compute_class_weight(class_weights, classes=classes, y=y)
|
||||
|
||||
# When the user specifies class weights, compute_class_weights should just
|
||||
# return them.
|
||||
assert_array_almost_equal(np.asarray([1.0, 2.0, 3.0]), cw)
|
||||
|
||||
# When a class weight is specified that isn't in classes, a ValueError
|
||||
# should get raised
|
||||
msg = 'Class label 4 not present.'
|
||||
class_weights = {0: 1.0, 1: 2.0, 2: 3.0, 4: 1.5}
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
compute_class_weight(class_weights, classes=classes, y=y)
|
||||
|
||||
msg = 'Class label -1 not present.'
|
||||
class_weights = {-1: 5.0, 0: 1.0, 1: 2.0, 2: 3.0}
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
compute_class_weight(class_weights, classes=classes, y=y)
|
||||
|
||||
|
||||
def test_compute_class_weight_invariance():
|
||||
# Test that results with class_weight="balanced" is invariant wrt
|
||||
# class imbalance if the number of samples is identical.
|
||||
# The test uses a balanced two class dataset with 100 datapoints.
|
||||
# It creates three versions, one where class 1 is duplicated
|
||||
# resulting in 150 points of class 1 and 50 of class 0,
|
||||
# one where there are 50 points in class 1 and 150 in class 0,
|
||||
# and one where there are 100 points of each class (this one is balanced
|
||||
# again).
|
||||
# With balancing class weights, all three should give the same model.
|
||||
X, y = make_blobs(centers=2, random_state=0)
|
||||
# create dataset where class 1 is duplicated twice
|
||||
X_1 = np.vstack([X] + [X[y == 1]] * 2)
|
||||
y_1 = np.hstack([y] + [y[y == 1]] * 2)
|
||||
# create dataset where class 0 is duplicated twice
|
||||
X_0 = np.vstack([X] + [X[y == 0]] * 2)
|
||||
y_0 = np.hstack([y] + [y[y == 0]] * 2)
|
||||
# duplicate everything
|
||||
X_ = np.vstack([X] * 2)
|
||||
y_ = np.hstack([y] * 2)
|
||||
# results should be identical
|
||||
logreg1 = LogisticRegression(class_weight="balanced").fit(X_1, y_1)
|
||||
logreg0 = LogisticRegression(class_weight="balanced").fit(X_0, y_0)
|
||||
logreg = LogisticRegression(class_weight="balanced").fit(X_, y_)
|
||||
assert_array_almost_equal(logreg1.coef_, logreg0.coef_)
|
||||
assert_array_almost_equal(logreg.coef_, logreg0.coef_)
|
||||
|
||||
|
||||
def test_compute_class_weight_balanced_negative():
|
||||
# Test compute_class_weight when labels are negative
|
||||
# Test with balanced class labels.
|
||||
classes = np.array([-2, -1, 0])
|
||||
y = np.asarray([-1, -1, 0, 0, -2, -2])
|
||||
|
||||
cw = compute_class_weight("balanced", classes=classes, y=y)
|
||||
assert len(cw) == len(classes)
|
||||
assert_array_almost_equal(cw, np.array([1., 1., 1.]))
|
||||
|
||||
# Test with unbalanced class labels.
|
||||
y = np.asarray([-1, 0, 0, -2, -2, -2])
|
||||
|
||||
cw = compute_class_weight("balanced", classes=classes, y=y)
|
||||
assert len(cw) == len(classes)
|
||||
class_counts = np.bincount(y + 2)
|
||||
assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
|
||||
assert_array_almost_equal(cw, [2. / 3, 2., 1.])
|
||||
|
||||
|
||||
def test_compute_class_weight_balanced_unordered():
|
||||
# Test compute_class_weight when classes are unordered
|
||||
classes = np.array([1, 0, 3])
|
||||
y = np.asarray([1, 0, 0, 3, 3, 3])
|
||||
|
||||
cw = compute_class_weight("balanced", classes=classes, y=y)
|
||||
class_counts = np.bincount(y)[classes]
|
||||
assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
|
||||
assert_array_almost_equal(cw, [2., 1., 2. / 3])
|
||||
|
||||
|
||||
def test_compute_class_weight_default():
|
||||
# Test for the case where no weight is given for a present class.
|
||||
# Current behaviour is to assign the unweighted classes a weight of 1.
|
||||
y = np.asarray([2, 2, 2, 3, 3, 4])
|
||||
classes = np.unique(y)
|
||||
classes_len = len(classes)
|
||||
|
||||
# Test for non specified weights
|
||||
cw = compute_class_weight(None, classes=classes, y=y)
|
||||
assert len(cw) == classes_len
|
||||
assert_array_almost_equal(cw, np.ones(3))
|
||||
|
||||
# Tests for partly specified weights
|
||||
cw = compute_class_weight({2: 1.5}, classes=classes, y=y)
|
||||
assert len(cw) == classes_len
|
||||
assert_array_almost_equal(cw, [1.5, 1., 1.])
|
||||
|
||||
cw = compute_class_weight({2: 1.5, 4: 0.5}, classes=classes, y=y)
|
||||
assert len(cw) == classes_len
|
||||
assert_array_almost_equal(cw, [1.5, 1., 0.5])
|
||||
|
||||
|
||||
def test_compute_sample_weight():
|
||||
# Test (and demo) compute_sample_weight.
|
||||
# Test with balanced classes
|
||||
y = np.asarray([1, 1, 1, 2, 2, 2])
|
||||
sample_weight = compute_sample_weight("balanced", y)
|
||||
assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
|
||||
|
||||
# Test with user-defined weights
|
||||
sample_weight = compute_sample_weight({1: 2, 2: 1}, y)
|
||||
assert_array_almost_equal(sample_weight, [2., 2., 2., 1., 1., 1.])
|
||||
|
||||
# Test with column vector of balanced classes
|
||||
y = np.asarray([[1], [1], [1], [2], [2], [2]])
|
||||
sample_weight = compute_sample_weight("balanced", y)
|
||||
assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
|
||||
|
||||
# Test with unbalanced classes
|
||||
y = np.asarray([1, 1, 1, 2, 2, 2, 3])
|
||||
sample_weight = compute_sample_weight("balanced", y)
|
||||
expected_balanced = np.array([0.7777, 0.7777, 0.7777, 0.7777, 0.7777,
|
||||
0.7777, 2.3333])
|
||||
assert_array_almost_equal(sample_weight, expected_balanced, decimal=4)
|
||||
|
||||
# Test with `None` weights
|
||||
sample_weight = compute_sample_weight(None, y)
|
||||
assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 1.])
|
||||
|
||||
# Test with multi-output of balanced classes
|
||||
y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
|
||||
sample_weight = compute_sample_weight("balanced", y)
|
||||
assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
|
||||
|
||||
# Test with multi-output with user-defined weights
|
||||
y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
|
||||
sample_weight = compute_sample_weight([{1: 2, 2: 1}, {0: 1, 1: 2}], y)
|
||||
assert_array_almost_equal(sample_weight, [2., 2., 2., 2., 2., 2.])
|
||||
|
||||
# Test with multi-output of unbalanced classes
|
||||
y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [3, -1]])
|
||||
sample_weight = compute_sample_weight("balanced", y)
|
||||
assert_array_almost_equal(sample_weight, expected_balanced ** 2, decimal=3)
|
||||
|
||||
|
||||
def test_compute_sample_weight_with_subsample():
|
||||
# Test compute_sample_weight with subsamples specified.
|
||||
# Test with balanced classes and all samples present
|
||||
y = np.asarray([1, 1, 1, 2, 2, 2])
|
||||
sample_weight = compute_sample_weight("balanced", y, indices=range(6))
|
||||
assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
|
||||
|
||||
# Test with column vector of balanced classes and all samples present
|
||||
y = np.asarray([[1], [1], [1], [2], [2], [2]])
|
||||
sample_weight = compute_sample_weight("balanced", y, indices=range(6))
|
||||
assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
|
||||
|
||||
# Test with a subsample
|
||||
y = np.asarray([1, 1, 1, 2, 2, 2])
|
||||
sample_weight = compute_sample_weight("balanced", y, indices=range(4))
|
||||
assert_array_almost_equal(sample_weight, [2. / 3, 2. / 3,
|
||||
2. / 3, 2., 2., 2.])
|
||||
|
||||
# Test with a bootstrap subsample
|
||||
y = np.asarray([1, 1, 1, 2, 2, 2])
|
||||
sample_weight = compute_sample_weight("balanced", y,
|
||||
indices=[0, 1, 1, 2, 2, 3])
|
||||
expected_balanced = np.asarray([0.6, 0.6, 0.6, 3., 3., 3.])
|
||||
assert_array_almost_equal(sample_weight, expected_balanced)
|
||||
|
||||
# Test with a bootstrap subsample for multi-output
|
||||
y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
|
||||
sample_weight = compute_sample_weight("balanced", y,
|
||||
indices=[0, 1, 1, 2, 2, 3])
|
||||
assert_array_almost_equal(sample_weight, expected_balanced ** 2)
|
||||
|
||||
# Test with a missing class
|
||||
y = np.asarray([1, 1, 1, 2, 2, 2, 3])
|
||||
sample_weight = compute_sample_weight("balanced", y, indices=range(6))
|
||||
assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.])
|
||||
|
||||
# Test with a missing class for multi-output
|
||||
y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [2, 2]])
|
||||
sample_weight = compute_sample_weight("balanced", y, indices=range(6))
|
||||
assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.])
|
||||
|
||||
|
||||
def test_compute_sample_weight_errors():
|
||||
# Test compute_sample_weight raises errors expected.
|
||||
# Invalid preset string
|
||||
y = np.asarray([1, 1, 1, 2, 2, 2])
|
||||
y_ = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
compute_sample_weight("ni", y)
|
||||
with pytest.raises(ValueError):
|
||||
compute_sample_weight("ni", y, indices=range(4))
|
||||
with pytest.raises(ValueError):
|
||||
compute_sample_weight("ni", y_)
|
||||
with pytest.raises(ValueError):
|
||||
compute_sample_weight("ni", y_, indices=range(4))
|
||||
|
||||
# Not "balanced" for subsample
|
||||
with pytest.raises(ValueError):
|
||||
compute_sample_weight({1: 2, 2: 1}, y, indices=range(4))
|
||||
|
||||
# Not a list or preset for multi-output
|
||||
with pytest.raises(ValueError):
|
||||
compute_sample_weight({1: 2, 2: 1}, y_)
|
||||
|
||||
# Incorrect length list for multi-output
|
||||
with pytest.raises(ValueError):
|
||||
compute_sample_weight([{1: 2, 2: 1}], y_)
|
||||
|
||||
|
||||
def test_compute_sample_weight_more_than_32():
|
||||
# Non-regression smoke test for #12146
|
||||
y = np.arange(50) # more than 32 distinct classes
|
||||
indices = np.arange(50) # use subsampling
|
||||
weight = compute_sample_weight('balanced', y, indices=indices)
|
||||
assert_array_almost_equal(weight, np.ones(y.shape[0]))
|
229
venv/Lib/site-packages/sklearn/utils/tests/test_cython_blas.py
Normal file
229
venv/Lib/site-packages/sklearn/utils/tests/test_cython_blas.py
Normal file
|
@ -0,0 +1,229 @@
|
|||
import pytest
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sklearn.utils._testing import assert_allclose
|
||||
from sklearn.utils._cython_blas import _dot_memview
|
||||
from sklearn.utils._cython_blas import _asum_memview
|
||||
from sklearn.utils._cython_blas import _axpy_memview
|
||||
from sklearn.utils._cython_blas import _nrm2_memview
|
||||
from sklearn.utils._cython_blas import _copy_memview
|
||||
from sklearn.utils._cython_blas import _scal_memview
|
||||
from sklearn.utils._cython_blas import _rotg_memview
|
||||
from sklearn.utils._cython_blas import _rot_memview
|
||||
from sklearn.utils._cython_blas import _gemv_memview
|
||||
from sklearn.utils._cython_blas import _ger_memview
|
||||
from sklearn.utils._cython_blas import _gemm_memview
|
||||
from sklearn.utils._cython_blas import RowMajor, ColMajor
|
||||
from sklearn.utils._cython_blas import Trans, NoTrans
|
||||
|
||||
|
||||
def _numpy_to_cython(dtype):
|
||||
cython = pytest.importorskip("cython")
|
||||
if dtype == np.float32:
|
||||
return cython.float
|
||||
elif dtype == np.float64:
|
||||
return cython.double
|
||||
|
||||
|
||||
RTOL = {np.float32: 1e-6, np.float64: 1e-12}
|
||||
ORDER = {RowMajor: 'C', ColMajor: 'F'}
|
||||
|
||||
|
||||
def _no_op(x):
|
||||
return x
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
||||
def test_dot(dtype):
|
||||
dot = _dot_memview[_numpy_to_cython(dtype)]
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
x = rng.random_sample(10).astype(dtype, copy=False)
|
||||
y = rng.random_sample(10).astype(dtype, copy=False)
|
||||
|
||||
expected = x.dot(y)
|
||||
actual = dot(x, y)
|
||||
|
||||
assert_allclose(actual, expected, rtol=RTOL[dtype])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
||||
def test_asum(dtype):
|
||||
asum = _asum_memview[_numpy_to_cython(dtype)]
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
x = rng.random_sample(10).astype(dtype, copy=False)
|
||||
|
||||
expected = np.abs(x).sum()
|
||||
actual = asum(x)
|
||||
|
||||
assert_allclose(actual, expected, rtol=RTOL[dtype])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
||||
def test_axpy(dtype):
|
||||
axpy = _axpy_memview[_numpy_to_cython(dtype)]
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
x = rng.random_sample(10).astype(dtype, copy=False)
|
||||
y = rng.random_sample(10).astype(dtype, copy=False)
|
||||
alpha = 2.5
|
||||
|
||||
expected = alpha * x + y
|
||||
axpy(alpha, x, y)
|
||||
|
||||
assert_allclose(y, expected, rtol=RTOL[dtype])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
||||
def test_nrm2(dtype):
|
||||
nrm2 = _nrm2_memview[_numpy_to_cython(dtype)]
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
x = rng.random_sample(10).astype(dtype, copy=False)
|
||||
|
||||
expected = np.linalg.norm(x)
|
||||
actual = nrm2(x)
|
||||
|
||||
assert_allclose(actual, expected, rtol=RTOL[dtype])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
||||
def test_copy(dtype):
|
||||
copy = _copy_memview[_numpy_to_cython(dtype)]
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
x = rng.random_sample(10).astype(dtype, copy=False)
|
||||
y = np.empty_like(x)
|
||||
|
||||
expected = x.copy()
|
||||
copy(x, y)
|
||||
|
||||
assert_allclose(y, expected, rtol=RTOL[dtype])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
||||
def test_scal(dtype):
|
||||
scal = _scal_memview[_numpy_to_cython(dtype)]
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
x = rng.random_sample(10).astype(dtype, copy=False)
|
||||
alpha = 2.5
|
||||
|
||||
expected = alpha * x
|
||||
scal(alpha, x)
|
||||
|
||||
assert_allclose(x, expected, rtol=RTOL[dtype])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
||||
def test_rotg(dtype):
|
||||
rotg = _rotg_memview[_numpy_to_cython(dtype)]
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
a = dtype(rng.randn())
|
||||
b = dtype(rng.randn())
|
||||
c, s = 0.0, 0.0
|
||||
|
||||
def expected_rotg(a, b):
|
||||
roe = a if abs(a) > abs(b) else b
|
||||
if a == 0 and b == 0:
|
||||
c, s, r, z = (1, 0, 0, 0)
|
||||
else:
|
||||
r = np.sqrt(a**2 + b**2) * (1 if roe >= 0 else -1)
|
||||
c, s = a/r, b/r
|
||||
z = s if roe == a else (1 if c == 0 else 1 / c)
|
||||
return r, z, c, s
|
||||
|
||||
expected = expected_rotg(a, b)
|
||||
actual = rotg(a, b, c, s)
|
||||
|
||||
assert_allclose(actual, expected, rtol=RTOL[dtype])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
||||
def test_rot(dtype):
|
||||
rot = _rot_memview[_numpy_to_cython(dtype)]
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
x = rng.random_sample(10).astype(dtype, copy=False)
|
||||
y = rng.random_sample(10).astype(dtype, copy=False)
|
||||
c = dtype(rng.randn())
|
||||
s = dtype(rng.randn())
|
||||
|
||||
expected_x = c * x + s * y
|
||||
expected_y = c * y - s * x
|
||||
|
||||
rot(x, y, c, s)
|
||||
|
||||
assert_allclose(x, expected_x)
|
||||
assert_allclose(y, expected_y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
||||
@pytest.mark.parametrize("opA, transA",
|
||||
[(_no_op, NoTrans), (np.transpose, Trans)],
|
||||
ids=["NoTrans", "Trans"])
|
||||
@pytest.mark.parametrize("order", [RowMajor, ColMajor],
|
||||
ids=["RowMajor", "ColMajor"])
|
||||
def test_gemv(dtype, opA, transA, order):
|
||||
gemv = _gemv_memview[_numpy_to_cython(dtype)]
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
A = np.asarray(opA(rng.random_sample((20, 10)).astype(dtype, copy=False)),
|
||||
order=ORDER[order])
|
||||
x = rng.random_sample(10).astype(dtype, copy=False)
|
||||
y = rng.random_sample(20).astype(dtype, copy=False)
|
||||
alpha, beta = 2.5, -0.5
|
||||
|
||||
expected = alpha * opA(A).dot(x) + beta * y
|
||||
gemv(transA, alpha, A, x, beta, y)
|
||||
|
||||
assert_allclose(y, expected, rtol=RTOL[dtype])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
||||
@pytest.mark.parametrize("order", [RowMajor, ColMajor],
|
||||
ids=["RowMajor", "ColMajor"])
|
||||
def test_ger(dtype, order):
|
||||
ger = _ger_memview[_numpy_to_cython(dtype)]
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
x = rng.random_sample(10).astype(dtype, copy=False)
|
||||
y = rng.random_sample(20).astype(dtype, copy=False)
|
||||
A = np.asarray(rng.random_sample((10, 20)).astype(dtype, copy=False),
|
||||
order=ORDER[order])
|
||||
alpha = 2.5
|
||||
|
||||
expected = alpha * np.outer(x, y) + A
|
||||
ger(alpha, x, y, A)
|
||||
|
||||
assert_allclose(A, expected, rtol=RTOL[dtype])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
||||
@pytest.mark.parametrize("opB, transB",
|
||||
[(_no_op, NoTrans), (np.transpose, Trans)],
|
||||
ids=["NoTrans", "Trans"])
|
||||
@pytest.mark.parametrize("opA, transA",
|
||||
[(_no_op, NoTrans), (np.transpose, Trans)],
|
||||
ids=["NoTrans", "Trans"])
|
||||
@pytest.mark.parametrize("order", [RowMajor, ColMajor],
|
||||
ids=["RowMajor", "ColMajor"])
|
||||
def test_gemm(dtype, opA, transA, opB, transB, order):
|
||||
gemm = _gemm_memview[_numpy_to_cython(dtype)]
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
A = np.asarray(opA(rng.random_sample((30, 10)).astype(dtype, copy=False)),
|
||||
order=ORDER[order])
|
||||
B = np.asarray(opB(rng.random_sample((10, 20)).astype(dtype, copy=False)),
|
||||
order=ORDER[order])
|
||||
C = np.asarray(rng.random_sample((30, 20)).astype(dtype, copy=False),
|
||||
order=ORDER[order])
|
||||
alpha, beta = 2.5, -0.5
|
||||
|
||||
expected = alpha * opA(A).dot(opB(B)) + beta * C
|
||||
gemm(transA, transB, alpha, A, B, beta, C)
|
||||
|
||||
assert_allclose(C, expected, rtol=RTOL[dtype])
|
|
@ -0,0 +1,128 @@
|
|||
import pytest
|
||||
import types
|
||||
import numpy as np
|
||||
import warnings
|
||||
|
||||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn.utils import all_estimators
|
||||
from sklearn.utils.estimator_checks import choose_check_classifiers_labels
|
||||
from sklearn.utils.estimator_checks import NotAnArray
|
||||
from sklearn.utils.estimator_checks import enforce_estimator_tags_y
|
||||
from sklearn.utils.estimator_checks import is_public_parameter
|
||||
from sklearn.utils.estimator_checks import pairwise_estimator_convert_X
|
||||
from sklearn.utils.estimator_checks import set_checking_parameters
|
||||
from sklearn.utils.optimize import newton_cg
|
||||
from sklearn.utils.random import random_choice_csc
|
||||
from sklearn.utils import safe_indexing
|
||||
|
||||
|
||||
# This file tests the utils that are deprecated
|
||||
|
||||
|
||||
# TODO: remove in 0.24
|
||||
def test_choose_check_classifiers_labels_deprecated():
|
||||
with pytest.warns(FutureWarning,
|
||||
match="removed in version 0.24"):
|
||||
choose_check_classifiers_labels(None, None, None)
|
||||
|
||||
|
||||
# TODO: remove in 0.24
|
||||
def test_enforce_estimator_tags_y():
|
||||
with pytest.warns(FutureWarning,
|
||||
match="removed in version 0.24"):
|
||||
enforce_estimator_tags_y(DummyClassifier(), np.array([0, 1]))
|
||||
|
||||
|
||||
# TODO: remove in 0.24
|
||||
def test_notanarray():
|
||||
with pytest.warns(FutureWarning,
|
||||
match="removed in version 0.24"):
|
||||
NotAnArray([1, 2])
|
||||
|
||||
|
||||
# TODO: remove in 0.24
|
||||
def test_is_public_parameter():
|
||||
with pytest.warns(FutureWarning,
|
||||
match="removed in version 0.24"):
|
||||
is_public_parameter('hello')
|
||||
|
||||
|
||||
# TODO: remove in 0.24
|
||||
def test_pairwise_estimator_convert_X():
|
||||
with pytest.warns(FutureWarning,
|
||||
match="removed in version 0.24"):
|
||||
pairwise_estimator_convert_X([[1, 2]], DummyClassifier())
|
||||
|
||||
|
||||
# TODO: remove in 0.24
|
||||
def test_set_checking_parameters():
|
||||
with pytest.warns(FutureWarning,
|
||||
match="removed in version 0.24"):
|
||||
set_checking_parameters(DummyClassifier())
|
||||
|
||||
|
||||
# TODO: remove in 0.24
|
||||
def test_newton_cg():
|
||||
rng = np.random.RandomState(0)
|
||||
A = rng.normal(size=(10, 10))
|
||||
x0 = np.ones(10)
|
||||
|
||||
def func(x):
|
||||
Ax = A.dot(x)
|
||||
return .5 * (Ax).dot(Ax)
|
||||
|
||||
def grad(x):
|
||||
return A.T.dot(A.dot(x))
|
||||
|
||||
def grad_hess(x):
|
||||
return grad(x), lambda x: A.T.dot(A.dot(x))
|
||||
|
||||
with pytest.warns(FutureWarning,
|
||||
match="removed in version 0.24"):
|
||||
newton_cg(grad_hess, func, grad, x0)
|
||||
|
||||
|
||||
# TODO: remove in 0.24
|
||||
def test_random_choice_csc():
|
||||
with pytest.warns(FutureWarning,
|
||||
match="removed in version 0.24"):
|
||||
random_choice_csc(10, [[2]])
|
||||
|
||||
|
||||
# TODO: remove in 0.24
|
||||
def test_safe_indexing():
|
||||
with pytest.warns(FutureWarning,
|
||||
match="removed in version 0.24"):
|
||||
safe_indexing([1, 2], 0)
|
||||
|
||||
|
||||
# TODO: remove in 0.24
|
||||
def test_partial_dependence_no_shadowing():
|
||||
# Non-regression test for:
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/15842
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", category=FutureWarning)
|
||||
from sklearn.inspection.partial_dependence import partial_dependence as _ # noqa
|
||||
|
||||
# Calling all_estimators() also triggers a recursive import of all
|
||||
# submodules, including deprecated ones.
|
||||
all_estimators()
|
||||
|
||||
from sklearn.inspection import partial_dependence
|
||||
assert isinstance(partial_dependence, types.FunctionType)
|
||||
|
||||
|
||||
# TODO: remove in 0.24
|
||||
def test_dict_learning_no_shadowing():
|
||||
# Non-regression test for:
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/15842
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", category=FutureWarning)
|
||||
from sklearn.decomposition.dict_learning import dict_learning as _ # noqa
|
||||
|
||||
# Calling all_estimators() also triggers a recursive import of all
|
||||
# submodules, including deprecated ones.
|
||||
all_estimators()
|
||||
|
||||
from sklearn.decomposition import dict_learning
|
||||
assert isinstance(dict_learning, types.FunctionType)
|
|
@ -0,0 +1,59 @@
|
|||
# Authors: Raghav RV <rvraghav93@gmail.com>
|
||||
# License: BSD 3 clause
|
||||
|
||||
|
||||
import pickle
|
||||
|
||||
from sklearn.utils.deprecation import _is_deprecated
|
||||
from sklearn.utils.deprecation import deprecated
|
||||
from sklearn.utils._testing import assert_warns_message
|
||||
|
||||
|
||||
@deprecated('qwerty')
|
||||
class MockClass1:
|
||||
pass
|
||||
|
||||
|
||||
class MockClass2:
|
||||
@deprecated('mockclass2_method')
|
||||
def method(self):
|
||||
pass
|
||||
|
||||
|
||||
class MockClass3:
|
||||
@deprecated()
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
|
||||
class MockClass4:
|
||||
pass
|
||||
|
||||
|
||||
@deprecated()
|
||||
def mock_function():
|
||||
return 10
|
||||
|
||||
|
||||
def test_deprecated():
|
||||
assert_warns_message(FutureWarning, 'qwerty', MockClass1)
|
||||
assert_warns_message(FutureWarning, 'mockclass2_method',
|
||||
MockClass2().method)
|
||||
assert_warns_message(FutureWarning, 'deprecated', MockClass3)
|
||||
val = assert_warns_message(FutureWarning, 'deprecated',
|
||||
mock_function)
|
||||
assert val == 10
|
||||
|
||||
|
||||
def test_is_deprecated():
|
||||
# Test if _is_deprecated helper identifies wrapping via deprecated
|
||||
# NOTE it works only for class methods and functions
|
||||
assert _is_deprecated(MockClass1.__init__)
|
||||
assert _is_deprecated(MockClass2().method)
|
||||
assert _is_deprecated(MockClass3.__init__)
|
||||
assert not _is_deprecated(MockClass4.__init__)
|
||||
assert _is_deprecated(mock_function)
|
||||
|
||||
|
||||
def test_pickle():
|
||||
pickle.loads(pickle.dumps(mock_function))
|
|
@ -0,0 +1,640 @@
|
|||
import unittest
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
import joblib
|
||||
|
||||
from io import StringIO
|
||||
|
||||
from sklearn.base import BaseEstimator, ClassifierMixin
|
||||
from sklearn.utils import deprecated
|
||||
from sklearn.utils._testing import (assert_raises_regex,
|
||||
ignore_warnings,
|
||||
assert_warns, assert_raises,
|
||||
SkipTest)
|
||||
from sklearn.utils.estimator_checks import check_estimator, _NotAnArray
|
||||
from sklearn.utils.estimator_checks \
|
||||
import check_class_weight_balanced_linear_classifier
|
||||
from sklearn.utils.estimator_checks import set_random_state
|
||||
from sklearn.utils.estimator_checks import _set_checking_parameters
|
||||
from sklearn.utils.estimator_checks import check_estimators_unfitted
|
||||
from sklearn.utils.estimator_checks import check_fit_score_takes_y
|
||||
from sklearn.utils.estimator_checks import check_no_attributes_set_in_init
|
||||
from sklearn.utils.estimator_checks import check_classifier_data_not_an_array
|
||||
from sklearn.utils.estimator_checks import check_regressor_data_not_an_array
|
||||
from sklearn.utils.validation import check_is_fitted
|
||||
from sklearn.utils.estimator_checks import check_outlier_corruption
|
||||
from sklearn.utils.fixes import np_version, parse_version
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
from sklearn.linear_model import LinearRegression, SGDClassifier
|
||||
from sklearn.mixture import GaussianMixture
|
||||
from sklearn.cluster import MiniBatchKMeans
|
||||
from sklearn.decomposition import NMF
|
||||
from sklearn.linear_model import MultiTaskElasticNet, LogisticRegression
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.neighbors import KNeighborsRegressor
|
||||
from sklearn.utils.validation import check_array
|
||||
from sklearn.utils import all_estimators
|
||||
|
||||
|
||||
class CorrectNotFittedError(ValueError):
|
||||
"""Exception class to raise if estimator is used before fitting.
|
||||
|
||||
Like NotFittedError, it inherits from ValueError, but not from
|
||||
AttributeError. Used for testing only.
|
||||
"""
|
||||
|
||||
|
||||
class BaseBadClassifier(ClassifierMixin, BaseEstimator):
|
||||
def fit(self, X, y):
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
return np.ones(X.shape[0])
|
||||
|
||||
|
||||
class ChangesDict(BaseEstimator):
|
||||
def __init__(self, key=0):
|
||||
self.key = key
|
||||
|
||||
def fit(self, X, y=None):
|
||||
X, y = self._validate_data(X, y)
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
X = check_array(X)
|
||||
self.key = 1000
|
||||
return np.ones(X.shape[0])
|
||||
|
||||
|
||||
class SetsWrongAttribute(BaseEstimator):
|
||||
def __init__(self, acceptable_key=0):
|
||||
self.acceptable_key = acceptable_key
|
||||
|
||||
def fit(self, X, y=None):
|
||||
self.wrong_attribute = 0
|
||||
X, y = self._validate_data(X, y)
|
||||
return self
|
||||
|
||||
|
||||
class ChangesWrongAttribute(BaseEstimator):
|
||||
def __init__(self, wrong_attribute=0):
|
||||
self.wrong_attribute = wrong_attribute
|
||||
|
||||
def fit(self, X, y=None):
|
||||
self.wrong_attribute = 1
|
||||
X, y = self._validate_data(X, y)
|
||||
return self
|
||||
|
||||
|
||||
class ChangesUnderscoreAttribute(BaseEstimator):
|
||||
def fit(self, X, y=None):
|
||||
self._good_attribute = 1
|
||||
X, y = self._validate_data(X, y)
|
||||
return self
|
||||
|
||||
|
||||
class RaisesErrorInSetParams(BaseEstimator):
|
||||
def __init__(self, p=0):
|
||||
self.p = p
|
||||
|
||||
def set_params(self, **kwargs):
|
||||
if 'p' in kwargs:
|
||||
p = kwargs.pop('p')
|
||||
if p < 0:
|
||||
raise ValueError("p can't be less than 0")
|
||||
self.p = p
|
||||
return super().set_params(**kwargs)
|
||||
|
||||
def fit(self, X, y=None):
|
||||
X, y = self._validate_data(X, y)
|
||||
return self
|
||||
|
||||
|
||||
class ModifiesValueInsteadOfRaisingError(BaseEstimator):
|
||||
def __init__(self, p=0):
|
||||
self.p = p
|
||||
|
||||
def set_params(self, **kwargs):
|
||||
if 'p' in kwargs:
|
||||
p = kwargs.pop('p')
|
||||
if p < 0:
|
||||
p = 0
|
||||
self.p = p
|
||||
return super().set_params(**kwargs)
|
||||
|
||||
def fit(self, X, y=None):
|
||||
X, y = self._validate_data(X, y)
|
||||
return self
|
||||
|
||||
|
||||
class ModifiesAnotherValue(BaseEstimator):
|
||||
def __init__(self, a=0, b='method1'):
|
||||
self.a = a
|
||||
self.b = b
|
||||
|
||||
def set_params(self, **kwargs):
|
||||
if 'a' in kwargs:
|
||||
a = kwargs.pop('a')
|
||||
self.a = a
|
||||
if a is None:
|
||||
kwargs.pop('b')
|
||||
self.b = 'method2'
|
||||
return super().set_params(**kwargs)
|
||||
|
||||
def fit(self, X, y=None):
|
||||
X, y = self._validate_data(X, y)
|
||||
return self
|
||||
|
||||
|
||||
class NoCheckinPredict(BaseBadClassifier):
|
||||
def fit(self, X, y):
|
||||
X, y = self._validate_data(X, y)
|
||||
return self
|
||||
|
||||
|
||||
class NoSparseClassifier(BaseBadClassifier):
|
||||
def fit(self, X, y):
|
||||
X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc'])
|
||||
if sp.issparse(X):
|
||||
raise ValueError("Nonsensical Error")
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
X = check_array(X)
|
||||
return np.ones(X.shape[0])
|
||||
|
||||
|
||||
class CorrectNotFittedErrorClassifier(BaseBadClassifier):
|
||||
def fit(self, X, y):
|
||||
X, y = self._validate_data(X, y)
|
||||
self.coef_ = np.ones(X.shape[1])
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
check_is_fitted(self)
|
||||
X = check_array(X)
|
||||
return np.ones(X.shape[0])
|
||||
|
||||
|
||||
class NoSampleWeightPandasSeriesType(BaseEstimator):
|
||||
def fit(self, X, y, sample_weight=None):
|
||||
# Convert data
|
||||
X, y = self._validate_data(
|
||||
X, y,
|
||||
accept_sparse=("csr", "csc"),
|
||||
multi_output=True,
|
||||
y_numeric=True)
|
||||
# Function is only called after we verify that pandas is installed
|
||||
from pandas import Series
|
||||
if isinstance(sample_weight, Series):
|
||||
raise ValueError("Estimator does not accept 'sample_weight'"
|
||||
"of type pandas.Series")
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
X = check_array(X)
|
||||
return np.ones(X.shape[0])
|
||||
|
||||
|
||||
class BadBalancedWeightsClassifier(BaseBadClassifier):
|
||||
def __init__(self, class_weight=None):
|
||||
self.class_weight = class_weight
|
||||
|
||||
def fit(self, X, y):
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
from sklearn.utils import compute_class_weight
|
||||
|
||||
label_encoder = LabelEncoder().fit(y)
|
||||
classes = label_encoder.classes_
|
||||
class_weight = compute_class_weight(self.class_weight, classes=classes,
|
||||
y=y)
|
||||
|
||||
# Intentionally modify the balanced class_weight
|
||||
# to simulate a bug and raise an exception
|
||||
if self.class_weight == "balanced":
|
||||
class_weight += 1.
|
||||
|
||||
# Simply assigning coef_ to the class_weight
|
||||
self.coef_ = class_weight
|
||||
return self
|
||||
|
||||
|
||||
class BadTransformerWithoutMixin(BaseEstimator):
|
||||
def fit(self, X, y=None):
|
||||
X = self._validate_data(X)
|
||||
return self
|
||||
|
||||
def transform(self, X):
|
||||
X = check_array(X)
|
||||
return X
|
||||
|
||||
|
||||
class NotInvariantPredict(BaseEstimator):
|
||||
def fit(self, X, y):
|
||||
# Convert data
|
||||
X, y = self._validate_data(
|
||||
X, y,
|
||||
accept_sparse=("csr", "csc"),
|
||||
multi_output=True,
|
||||
y_numeric=True)
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
# return 1 if X has more than one element else return 0
|
||||
X = check_array(X)
|
||||
if X.shape[0] > 1:
|
||||
return np.ones(X.shape[0])
|
||||
return np.zeros(X.shape[0])
|
||||
|
||||
|
||||
class LargeSparseNotSupportedClassifier(BaseEstimator):
|
||||
def fit(self, X, y):
|
||||
X, y = self._validate_data(
|
||||
X, y,
|
||||
accept_sparse=("csr", "csc", "coo"),
|
||||
accept_large_sparse=True,
|
||||
multi_output=True,
|
||||
y_numeric=True)
|
||||
if sp.issparse(X):
|
||||
if X.getformat() == "coo":
|
||||
if X.row.dtype == "int64" or X.col.dtype == "int64":
|
||||
raise ValueError(
|
||||
"Estimator doesn't support 64-bit indices")
|
||||
elif X.getformat() in ["csc", "csr"]:
|
||||
assert "int64" not in (X.indices.dtype, X.indptr.dtype),\
|
||||
"Estimator doesn't support 64-bit indices"
|
||||
|
||||
return self
|
||||
|
||||
|
||||
class SparseTransformer(BaseEstimator):
|
||||
def fit(self, X, y=None):
|
||||
self.X_shape_ = self._validate_data(X).shape
|
||||
return self
|
||||
|
||||
def fit_transform(self, X, y=None):
|
||||
return self.fit(X, y).transform(X)
|
||||
|
||||
def transform(self, X):
|
||||
X = check_array(X)
|
||||
if X.shape[1] != self.X_shape_[1]:
|
||||
raise ValueError('Bad number of features')
|
||||
return sp.csr_matrix(X)
|
||||
|
||||
|
||||
class EstimatorInconsistentForPandas(BaseEstimator):
|
||||
def fit(self, X, y):
|
||||
try:
|
||||
from pandas import DataFrame
|
||||
if isinstance(X, DataFrame):
|
||||
self.value_ = X.iloc[0, 0]
|
||||
else:
|
||||
X = check_array(X)
|
||||
self.value_ = X[1, 0]
|
||||
return self
|
||||
|
||||
except ImportError:
|
||||
X = check_array(X)
|
||||
self.value_ = X[1, 0]
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
X = check_array(X)
|
||||
return np.array([self.value_] * X.shape[0])
|
||||
|
||||
|
||||
class UntaggedBinaryClassifier(SGDClassifier):
|
||||
# Toy classifier that only supports binary classification, will fail tests.
|
||||
def fit(self, X, y, coef_init=None, intercept_init=None,
|
||||
sample_weight=None):
|
||||
super().fit(X, y, coef_init, intercept_init, sample_weight)
|
||||
if len(self.classes_) > 2:
|
||||
raise ValueError('Only 2 classes are supported')
|
||||
return self
|
||||
|
||||
def partial_fit(self, X, y, classes=None, sample_weight=None):
|
||||
super().partial_fit(X=X, y=y, classes=classes,
|
||||
sample_weight=sample_weight)
|
||||
if len(self.classes_) > 2:
|
||||
raise ValueError('Only 2 classes are supported')
|
||||
return self
|
||||
|
||||
|
||||
class TaggedBinaryClassifier(UntaggedBinaryClassifier):
|
||||
# Toy classifier that only supports binary classification.
|
||||
def _more_tags(self):
|
||||
return {'binary_only': True}
|
||||
|
||||
|
||||
class RequiresPositiveYRegressor(LinearRegression):
|
||||
|
||||
def fit(self, X, y):
|
||||
X, y = self._validate_data(X, y, multi_output=True)
|
||||
if (y <= 0).any():
|
||||
raise ValueError('negative y values not supported!')
|
||||
return super().fit(X, y)
|
||||
|
||||
def _more_tags(self):
|
||||
return {"requires_positive_y": True}
|
||||
|
||||
|
||||
def test_not_an_array_array_function():
|
||||
if np_version < parse_version('1.17'):
|
||||
raise SkipTest("array_function protocol not supported in numpy <1.17")
|
||||
not_array = _NotAnArray(np.ones(10))
|
||||
msg = "Don't want to call array_function sum!"
|
||||
assert_raises_regex(TypeError, msg, np.sum, not_array)
|
||||
# always returns True
|
||||
assert np.may_share_memory(not_array, None)
|
||||
|
||||
|
||||
def test_check_fit_score_takes_y_works_on_deprecated_fit():
|
||||
# Tests that check_fit_score_takes_y works on a class with
|
||||
# a deprecated fit method
|
||||
|
||||
class TestEstimatorWithDeprecatedFitMethod(BaseEstimator):
|
||||
@deprecated("Deprecated for the purpose of testing "
|
||||
"check_fit_score_takes_y")
|
||||
def fit(self, X, y):
|
||||
return self
|
||||
|
||||
check_fit_score_takes_y("test", TestEstimatorWithDeprecatedFitMethod())
|
||||
|
||||
|
||||
@ignore_warnings("Passing a class is depr", category=FutureWarning) # 0.24
|
||||
def test_check_estimator():
|
||||
# tests that the estimator actually fails on "bad" estimators.
|
||||
# not a complete test of all checks, which are very extensive.
|
||||
|
||||
# check that we have a set_params and can clone
|
||||
msg = "it does not implement a 'get_params' method"
|
||||
assert_raises_regex(TypeError, msg, check_estimator, object)
|
||||
msg = "object has no attribute '_get_tags'"
|
||||
assert_raises_regex(AttributeError, msg, check_estimator, object())
|
||||
# check that values returned by get_params match set_params
|
||||
msg = "get_params result does not match what was passed to set_params"
|
||||
assert_raises_regex(AssertionError, msg, check_estimator,
|
||||
ModifiesValueInsteadOfRaisingError())
|
||||
assert_warns(UserWarning, check_estimator, RaisesErrorInSetParams())
|
||||
assert_raises_regex(AssertionError, msg, check_estimator,
|
||||
ModifiesAnotherValue())
|
||||
# check that we have a fit method
|
||||
msg = "object has no attribute 'fit'"
|
||||
assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator)
|
||||
assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator())
|
||||
# check that fit does input validation
|
||||
msg = "ValueError not raised"
|
||||
assert_raises_regex(AssertionError, msg, check_estimator,
|
||||
BaseBadClassifier)
|
||||
assert_raises_regex(AssertionError, msg, check_estimator,
|
||||
BaseBadClassifier())
|
||||
# check that sample_weights in fit accepts pandas.Series type
|
||||
try:
|
||||
from pandas import Series # noqa
|
||||
msg = ("Estimator NoSampleWeightPandasSeriesType raises error if "
|
||||
"'sample_weight' parameter is of type pandas.Series")
|
||||
assert_raises_regex(
|
||||
ValueError, msg, check_estimator, NoSampleWeightPandasSeriesType)
|
||||
except ImportError:
|
||||
pass
|
||||
# check that predict does input validation (doesn't accept dicts in input)
|
||||
msg = "Estimator doesn't check for NaN and inf in predict"
|
||||
assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict)
|
||||
assert_raises_regex(AssertionError, msg, check_estimator,
|
||||
NoCheckinPredict())
|
||||
# check that estimator state does not change
|
||||
# at transform/predict/predict_proba time
|
||||
msg = 'Estimator changes __dict__ during predict'
|
||||
assert_raises_regex(AssertionError, msg, check_estimator, ChangesDict)
|
||||
# check that `fit` only changes attribures that
|
||||
# are private (start with an _ or end with a _).
|
||||
msg = ('Estimator ChangesWrongAttribute should not change or mutate '
|
||||
'the parameter wrong_attribute from 0 to 1 during fit.')
|
||||
assert_raises_regex(AssertionError, msg,
|
||||
check_estimator, ChangesWrongAttribute)
|
||||
check_estimator(ChangesUnderscoreAttribute)
|
||||
# check that `fit` doesn't add any public attribute
|
||||
msg = (r'Estimator adds public attribute\(s\) during the fit method.'
|
||||
' Estimators are only allowed to add private attributes'
|
||||
' either started with _ or ended'
|
||||
' with _ but wrong_attribute added')
|
||||
assert_raises_regex(AssertionError, msg,
|
||||
check_estimator, SetsWrongAttribute)
|
||||
# check for invariant method
|
||||
name = NotInvariantPredict.__name__
|
||||
method = 'predict'
|
||||
msg = ("{method} of {name} is not invariant when applied "
|
||||
"to a subset.").format(method=method, name=name)
|
||||
assert_raises_regex(AssertionError, msg,
|
||||
check_estimator, NotInvariantPredict)
|
||||
# check for sparse matrix input handling
|
||||
name = NoSparseClassifier.__name__
|
||||
msg = "Estimator %s doesn't seem to fail gracefully on sparse data" % name
|
||||
# the check for sparse input handling prints to the stdout,
|
||||
# instead of raising an error, so as not to remove the original traceback.
|
||||
# that means we need to jump through some hoops to catch it.
|
||||
old_stdout = sys.stdout
|
||||
string_buffer = StringIO()
|
||||
sys.stdout = string_buffer
|
||||
try:
|
||||
check_estimator(NoSparseClassifier)
|
||||
except:
|
||||
pass
|
||||
finally:
|
||||
sys.stdout = old_stdout
|
||||
assert msg in string_buffer.getvalue()
|
||||
|
||||
# Large indices test on bad estimator
|
||||
msg = ('Estimator LargeSparseNotSupportedClassifier doesn\'t seem to '
|
||||
r'support \S{3}_64 matrix, and is not failing gracefully.*')
|
||||
assert_raises_regex(AssertionError, msg, check_estimator,
|
||||
LargeSparseNotSupportedClassifier)
|
||||
|
||||
# does error on binary_only untagged estimator
|
||||
msg = 'Only 2 classes are supported'
|
||||
assert_raises_regex(ValueError, msg, check_estimator,
|
||||
UntaggedBinaryClassifier)
|
||||
|
||||
# non-regression test for estimators transforming to sparse data
|
||||
check_estimator(SparseTransformer())
|
||||
|
||||
# doesn't error on actual estimator
|
||||
check_estimator(LogisticRegression)
|
||||
check_estimator(LogisticRegression(C=0.01))
|
||||
check_estimator(MultiTaskElasticNet)
|
||||
check_estimator(MultiTaskElasticNet())
|
||||
|
||||
# doesn't error on binary_only tagged estimator
|
||||
check_estimator(TaggedBinaryClassifier)
|
||||
|
||||
# Check regressor with requires_positive_y estimator tag
|
||||
msg = 'negative y values not supported!'
|
||||
assert_raises_regex(ValueError, msg, check_estimator,
|
||||
RequiresPositiveYRegressor)
|
||||
|
||||
|
||||
def test_check_outlier_corruption():
|
||||
# should raise AssertionError
|
||||
decision = np.array([0., 1., 1.5, 2.])
|
||||
assert_raises(AssertionError, check_outlier_corruption, 1, 2, decision)
|
||||
# should pass
|
||||
decision = np.array([0., 1., 1., 2.])
|
||||
check_outlier_corruption(1, 2, decision)
|
||||
|
||||
|
||||
def test_check_estimator_transformer_no_mixin():
|
||||
# check that TransformerMixin is not required for transformer tests to run
|
||||
assert_raises_regex(AttributeError, '.*fit_transform.*',
|
||||
check_estimator, BadTransformerWithoutMixin())
|
||||
|
||||
|
||||
def test_check_estimator_clones():
|
||||
# check that check_estimator doesn't modify the estimator it receives
|
||||
from sklearn.datasets import load_iris
|
||||
iris = load_iris()
|
||||
|
||||
for Estimator in [GaussianMixture, LinearRegression,
|
||||
RandomForestClassifier, NMF, SGDClassifier,
|
||||
MiniBatchKMeans]:
|
||||
with ignore_warnings(category=FutureWarning):
|
||||
# when 'est = SGDClassifier()'
|
||||
est = Estimator()
|
||||
_set_checking_parameters(est)
|
||||
set_random_state(est)
|
||||
# without fitting
|
||||
old_hash = joblib.hash(est)
|
||||
check_estimator(est)
|
||||
assert old_hash == joblib.hash(est)
|
||||
|
||||
with ignore_warnings(category=FutureWarning):
|
||||
# when 'est = SGDClassifier()'
|
||||
est = Estimator()
|
||||
_set_checking_parameters(est)
|
||||
set_random_state(est)
|
||||
# with fitting
|
||||
est.fit(iris.data + 10, iris.target)
|
||||
old_hash = joblib.hash(est)
|
||||
check_estimator(est)
|
||||
assert old_hash == joblib.hash(est)
|
||||
|
||||
|
||||
def test_check_estimators_unfitted():
|
||||
# check that a ValueError/AttributeError is raised when calling predict
|
||||
# on an unfitted estimator
|
||||
msg = "NotFittedError not raised by predict"
|
||||
assert_raises_regex(AssertionError, msg, check_estimators_unfitted,
|
||||
"estimator", NoSparseClassifier())
|
||||
|
||||
# check that CorrectNotFittedError inherit from either ValueError
|
||||
# or AttributeError
|
||||
check_estimators_unfitted("estimator", CorrectNotFittedErrorClassifier())
|
||||
|
||||
|
||||
def test_check_no_attributes_set_in_init():
|
||||
class NonConformantEstimatorPrivateSet(BaseEstimator):
|
||||
def __init__(self):
|
||||
self.you_should_not_set_this_ = None
|
||||
|
||||
class NonConformantEstimatorNoParamSet(BaseEstimator):
|
||||
def __init__(self, you_should_set_this_=None):
|
||||
pass
|
||||
|
||||
assert_raises_regex(AssertionError,
|
||||
"Estimator estimator_name should not set any"
|
||||
" attribute apart from parameters during init."
|
||||
r" Found attributes \['you_should_not_set_this_'\].",
|
||||
check_no_attributes_set_in_init,
|
||||
'estimator_name',
|
||||
NonConformantEstimatorPrivateSet())
|
||||
assert_raises_regex(AssertionError,
|
||||
"Estimator estimator_name should store all "
|
||||
"parameters as an attribute during init. "
|
||||
"Did not find attributes "
|
||||
r"\['you_should_set_this_'\].",
|
||||
check_no_attributes_set_in_init,
|
||||
'estimator_name',
|
||||
NonConformantEstimatorNoParamSet())
|
||||
|
||||
|
||||
def test_check_estimator_pairwise():
|
||||
# check that check_estimator() works on estimator with _pairwise
|
||||
# kernel or metric
|
||||
|
||||
# test precomputed kernel
|
||||
est = SVC(kernel='precomputed')
|
||||
check_estimator(est)
|
||||
|
||||
# test precomputed metric
|
||||
est = KNeighborsRegressor(metric='precomputed')
|
||||
check_estimator(est)
|
||||
|
||||
|
||||
def test_check_classifier_data_not_an_array():
|
||||
assert_raises_regex(AssertionError,
|
||||
'Not equal to tolerance',
|
||||
check_classifier_data_not_an_array,
|
||||
'estimator_name',
|
||||
EstimatorInconsistentForPandas())
|
||||
|
||||
|
||||
def test_check_regressor_data_not_an_array():
|
||||
assert_raises_regex(AssertionError,
|
||||
'Not equal to tolerance',
|
||||
check_regressor_data_not_an_array,
|
||||
'estimator_name',
|
||||
EstimatorInconsistentForPandas())
|
||||
|
||||
|
||||
@ignore_warnings("Passing a class is depr", category=FutureWarning) # 0.24
|
||||
def test_check_estimator_required_parameters_skip():
|
||||
# TODO: remove whole test in 0.24 since passes classes to check_estimator()
|
||||
# isn't supported anymore
|
||||
class MyEstimator(BaseEstimator):
|
||||
_required_parameters = ["special_parameter"]
|
||||
|
||||
def __init__(self, special_parameter):
|
||||
self.special_parameter = special_parameter
|
||||
|
||||
assert_raises_regex(SkipTest, r"Can't instantiate estimator MyEstimator "
|
||||
r"which requires parameters "
|
||||
r"\['special_parameter'\]",
|
||||
check_estimator, MyEstimator)
|
||||
|
||||
|
||||
def run_tests_without_pytest():
|
||||
"""Runs the tests in this file without using pytest.
|
||||
"""
|
||||
main_module = sys.modules['__main__']
|
||||
test_functions = [getattr(main_module, name) for name in dir(main_module)
|
||||
if name.startswith('test_')]
|
||||
test_cases = [unittest.FunctionTestCase(fn) for fn in test_functions]
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTests(test_cases)
|
||||
runner = unittest.TextTestRunner()
|
||||
runner.run(suite)
|
||||
|
||||
|
||||
def test_check_class_weight_balanced_linear_classifier():
|
||||
# check that ill-computed balanced weights raises an exception
|
||||
assert_raises_regex(AssertionError,
|
||||
"Classifier estimator_name is not computing"
|
||||
" class_weight=balanced properly.",
|
||||
check_class_weight_balanced_linear_classifier,
|
||||
'estimator_name',
|
||||
BadBalancedWeightsClassifier)
|
||||
|
||||
|
||||
def test_all_estimators_all_public():
|
||||
# all_estimator should not fail when pytest is not installed and return
|
||||
# only public estimators
|
||||
estimators = all_estimators()
|
||||
for est in estimators:
|
||||
assert not est.__class__.__name__.startswith("_")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# This module is run as a script to check that we have no dependency on
|
||||
# pytest for estimator checks.
|
||||
run_tests_without_pytest()
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue