Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
307
venv/Lib/site-packages/sklearn/linear_model/_huber.py
Normal file
307
venv/Lib/site-packages/sklearn/linear_model/_huber.py
Normal file
|
@ -0,0 +1,307 @@
|
|||
# Authors: Manoj Kumar mks542@nyu.edu
|
||||
# License: BSD 3 clause
|
||||
|
||||
import numpy as np
|
||||
|
||||
from scipy import optimize
|
||||
|
||||
from ..base import BaseEstimator, RegressorMixin
|
||||
from ._base import LinearModel
|
||||
from ..utils import axis0_safe_slice
|
||||
from ..utils.validation import _check_sample_weight
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ..utils.extmath import safe_sparse_dot
|
||||
from ..utils.optimize import _check_optimize_result
|
||||
|
||||
|
||||
def _huber_loss_and_gradient(w, X, y, epsilon, alpha, sample_weight=None):
|
||||
"""Returns the Huber loss and the gradient.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
w : ndarray, shape (n_features + 1,) or (n_features + 2,)
|
||||
Feature vector.
|
||||
w[:n_features] gives the coefficients
|
||||
w[-1] gives the scale factor and if the intercept is fit w[-2]
|
||||
gives the intercept factor.
|
||||
|
||||
X : ndarray, shape (n_samples, n_features)
|
||||
Input data.
|
||||
|
||||
y : ndarray, shape (n_samples,)
|
||||
Target vector.
|
||||
|
||||
epsilon : float
|
||||
Robustness of the Huber estimator.
|
||||
|
||||
alpha : float
|
||||
Regularization parameter.
|
||||
|
||||
sample_weight : ndarray, shape (n_samples,), optional
|
||||
Weight assigned to each sample.
|
||||
|
||||
Returns
|
||||
-------
|
||||
loss : float
|
||||
Huber loss.
|
||||
|
||||
gradient : ndarray, shape (len(w))
|
||||
Returns the derivative of the Huber loss with respect to each
|
||||
coefficient, intercept and the scale as a vector.
|
||||
"""
|
||||
_, n_features = X.shape
|
||||
fit_intercept = (n_features + 2 == w.shape[0])
|
||||
if fit_intercept:
|
||||
intercept = w[-2]
|
||||
sigma = w[-1]
|
||||
w = w[:n_features]
|
||||
n_samples = np.sum(sample_weight)
|
||||
|
||||
# Calculate the values where |y - X'w -c / sigma| > epsilon
|
||||
# The values above this threshold are outliers.
|
||||
linear_loss = y - safe_sparse_dot(X, w)
|
||||
if fit_intercept:
|
||||
linear_loss -= intercept
|
||||
abs_linear_loss = np.abs(linear_loss)
|
||||
outliers_mask = abs_linear_loss > epsilon * sigma
|
||||
|
||||
# Calculate the linear loss due to the outliers.
|
||||
# This is equal to (2 * M * |y - X'w -c / sigma| - M**2) * sigma
|
||||
outliers = abs_linear_loss[outliers_mask]
|
||||
num_outliers = np.count_nonzero(outliers_mask)
|
||||
n_non_outliers = X.shape[0] - num_outliers
|
||||
|
||||
# n_sq_outliers includes the weight give to the outliers while
|
||||
# num_outliers is just the number of outliers.
|
||||
outliers_sw = sample_weight[outliers_mask]
|
||||
n_sw_outliers = np.sum(outliers_sw)
|
||||
outlier_loss = (2. * epsilon * np.sum(outliers_sw * outliers) -
|
||||
sigma * n_sw_outliers * epsilon ** 2)
|
||||
|
||||
# Calculate the quadratic loss due to the non-outliers.-
|
||||
# This is equal to |(y - X'w - c)**2 / sigma**2| * sigma
|
||||
non_outliers = linear_loss[~outliers_mask]
|
||||
weighted_non_outliers = sample_weight[~outliers_mask] * non_outliers
|
||||
weighted_loss = np.dot(weighted_non_outliers.T, non_outliers)
|
||||
squared_loss = weighted_loss / sigma
|
||||
|
||||
if fit_intercept:
|
||||
grad = np.zeros(n_features + 2)
|
||||
else:
|
||||
grad = np.zeros(n_features + 1)
|
||||
|
||||
# Gradient due to the squared loss.
|
||||
X_non_outliers = -axis0_safe_slice(X, ~outliers_mask, n_non_outliers)
|
||||
grad[:n_features] = (
|
||||
2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))
|
||||
|
||||
# Gradient due to the linear loss.
|
||||
signed_outliers = np.ones_like(outliers)
|
||||
signed_outliers_mask = linear_loss[outliers_mask] < 0
|
||||
signed_outliers[signed_outliers_mask] = -1.0
|
||||
X_outliers = axis0_safe_slice(X, outliers_mask, num_outliers)
|
||||
sw_outliers = sample_weight[outliers_mask] * signed_outliers
|
||||
grad[:n_features] -= 2. * epsilon * (
|
||||
safe_sparse_dot(sw_outliers, X_outliers))
|
||||
|
||||
# Gradient due to the penalty.
|
||||
grad[:n_features] += alpha * 2. * w
|
||||
|
||||
# Gradient due to sigma.
|
||||
grad[-1] = n_samples
|
||||
grad[-1] -= n_sw_outliers * epsilon ** 2
|
||||
grad[-1] -= squared_loss / sigma
|
||||
|
||||
# Gradient due to the intercept.
|
||||
if fit_intercept:
|
||||
grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma
|
||||
grad[-2] -= 2. * epsilon * np.sum(sw_outliers)
|
||||
|
||||
loss = n_samples * sigma + squared_loss + outlier_loss
|
||||
loss += alpha * np.dot(w, w)
|
||||
return loss, grad
|
||||
|
||||
|
||||
class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator):
|
||||
"""Linear regression model that is robust to outliers.
|
||||
|
||||
The Huber Regressor optimizes the squared loss for the samples where
|
||||
``|(y - X'w) / sigma| < epsilon`` and the absolute loss for the samples
|
||||
where ``|(y - X'w) / sigma| > epsilon``, where w and sigma are parameters
|
||||
to be optimized. The parameter sigma makes sure that if y is scaled up
|
||||
or down by a certain factor, one does not need to rescale epsilon to
|
||||
achieve the same robustness. Note that this does not take into account
|
||||
the fact that the different features of X may be of different scales.
|
||||
|
||||
This makes sure that the loss function is not heavily influenced by the
|
||||
outliers while not completely ignoring their effect.
|
||||
|
||||
Read more in the :ref:`User Guide <huber_regression>`
|
||||
|
||||
.. versionadded:: 0.18
|
||||
|
||||
Parameters
|
||||
----------
|
||||
epsilon : float, greater than 1.0, default 1.35
|
||||
The parameter epsilon controls the number of samples that should be
|
||||
classified as outliers. The smaller the epsilon, the more robust it is
|
||||
to outliers.
|
||||
|
||||
max_iter : int, default 100
|
||||
Maximum number of iterations that
|
||||
``scipy.optimize.minimize(method="L-BFGS-B")`` should run for.
|
||||
|
||||
alpha : float, default 0.0001
|
||||
Regularization parameter.
|
||||
|
||||
warm_start : bool, default False
|
||||
This is useful if the stored attributes of a previously used model
|
||||
has to be reused. If set to False, then the coefficients will
|
||||
be rewritten for every call to fit.
|
||||
See :term:`the Glossary <warm_start>`.
|
||||
|
||||
fit_intercept : bool, default True
|
||||
Whether or not to fit the intercept. This can be set to False
|
||||
if the data is already centered around the origin.
|
||||
|
||||
tol : float, default 1e-5
|
||||
The iteration will stop when
|
||||
``max{|proj g_i | i = 1, ..., n}`` <= ``tol``
|
||||
where pg_i is the i-th component of the projected gradient.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
coef_ : array, shape (n_features,)
|
||||
Features got by optimizing the Huber loss.
|
||||
|
||||
intercept_ : float
|
||||
Bias.
|
||||
|
||||
scale_ : float
|
||||
The value by which ``|y - X'w - c|`` is scaled down.
|
||||
|
||||
n_iter_ : int
|
||||
Number of iterations that
|
||||
``scipy.optimize.minimize(method="L-BFGS-B")`` has run for.
|
||||
|
||||
.. versionchanged:: 0.20
|
||||
|
||||
In SciPy <= 1.0.0 the number of lbfgs iterations may exceed
|
||||
``max_iter``. ``n_iter_`` will now report at most ``max_iter``.
|
||||
|
||||
outliers_ : array, shape (n_samples,)
|
||||
A boolean mask which is set to True where the samples are identified
|
||||
as outliers.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> from sklearn.linear_model import HuberRegressor, LinearRegression
|
||||
>>> from sklearn.datasets import make_regression
|
||||
>>> rng = np.random.RandomState(0)
|
||||
>>> X, y, coef = make_regression(
|
||||
... n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)
|
||||
>>> X[:4] = rng.uniform(10, 20, (4, 2))
|
||||
>>> y[:4] = rng.uniform(10, 20, 4)
|
||||
>>> huber = HuberRegressor().fit(X, y)
|
||||
>>> huber.score(X, y)
|
||||
-7.284...
|
||||
>>> huber.predict(X[:1,])
|
||||
array([806.7200...])
|
||||
>>> linear = LinearRegression().fit(X, y)
|
||||
>>> print("True coefficients:", coef)
|
||||
True coefficients: [20.4923... 34.1698...]
|
||||
>>> print("Huber coefficients:", huber.coef_)
|
||||
Huber coefficients: [17.7906... 31.0106...]
|
||||
>>> print("Linear Regression coefficients:", linear.coef_)
|
||||
Linear Regression coefficients: [-1.9221... 7.0226...]
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics
|
||||
Concomitant scale estimates, pg 172
|
||||
.. [2] Art B. Owen (2006), A robust hybrid of lasso and ridge regression.
|
||||
https://statweb.stanford.edu/~owen/reports/hhu.pdf
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, *, epsilon=1.35, max_iter=100, alpha=0.0001,
|
||||
warm_start=False, fit_intercept=True, tol=1e-05):
|
||||
self.epsilon = epsilon
|
||||
self.max_iter = max_iter
|
||||
self.alpha = alpha
|
||||
self.warm_start = warm_start
|
||||
self.fit_intercept = fit_intercept
|
||||
self.tol = tol
|
||||
|
||||
def fit(self, X, y, sample_weight=None):
|
||||
"""Fit the model according to the given training data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like, shape (n_samples, n_features)
|
||||
Training vector, where n_samples in the number of samples and
|
||||
n_features is the number of features.
|
||||
|
||||
y : array-like, shape (n_samples,)
|
||||
Target vector relative to X.
|
||||
|
||||
sample_weight : array-like, shape (n_samples,)
|
||||
Weight given to each sample.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
"""
|
||||
X, y = self._validate_data(
|
||||
X, y, copy=False, accept_sparse=['csr'], y_numeric=True,
|
||||
dtype=[np.float64, np.float32])
|
||||
|
||||
sample_weight = _check_sample_weight(sample_weight, X)
|
||||
|
||||
if self.epsilon < 1.0:
|
||||
raise ValueError(
|
||||
"epsilon should be greater than or equal to 1.0, got %f"
|
||||
% self.epsilon)
|
||||
|
||||
if self.warm_start and hasattr(self, 'coef_'):
|
||||
parameters = np.concatenate(
|
||||
(self.coef_, [self.intercept_, self.scale_]))
|
||||
else:
|
||||
if self.fit_intercept:
|
||||
parameters = np.zeros(X.shape[1] + 2)
|
||||
else:
|
||||
parameters = np.zeros(X.shape[1] + 1)
|
||||
# Make sure to initialize the scale parameter to a strictly
|
||||
# positive value:
|
||||
parameters[-1] = 1
|
||||
|
||||
# Sigma or the scale factor should be non-negative.
|
||||
# Setting it to be zero might cause undefined bounds hence we set it
|
||||
# to a value close to zero.
|
||||
bounds = np.tile([-np.inf, np.inf], (parameters.shape[0], 1))
|
||||
bounds[-1][0] = np.finfo(np.float64).eps * 10
|
||||
|
||||
opt_res = optimize.minimize(
|
||||
_huber_loss_and_gradient, parameters, method="L-BFGS-B", jac=True,
|
||||
args=(X, y, self.epsilon, self.alpha, sample_weight),
|
||||
options={"maxiter": self.max_iter, "gtol": self.tol, "iprint": -1},
|
||||
bounds=bounds)
|
||||
|
||||
parameters = opt_res.x
|
||||
|
||||
if opt_res.status == 2:
|
||||
raise ValueError("HuberRegressor convergence failed:"
|
||||
" l-BFGS-b solver terminated with %s"
|
||||
% opt_res.message)
|
||||
self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
|
||||
self.scale_ = parameters[-1]
|
||||
if self.fit_intercept:
|
||||
self.intercept_ = parameters[-2]
|
||||
else:
|
||||
self.intercept_ = 0.0
|
||||
self.coef_ = parameters[:X.shape[1]]
|
||||
|
||||
residual = np.abs(
|
||||
y - safe_sparse_dot(X, self.coef_) - self.intercept_)
|
||||
self.outliers_ = residual > self.scale_ * self.epsilon
|
||||
return self
|
Loading…
Add table
Add a link
Reference in a new issue