Uploaded Test files

This commit is contained in:
Batuhan Berk Başoğlu 2020-11-12 11:05:57 -05:00
parent f584ad9d97
commit 2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions

View file

@ -0,0 +1,530 @@
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
# Fabian Pedregosa <fabian.pedregosa@inria.fr>
#
# License: BSD 3 clause
import pytest
import numpy as np
from scipy import sparse
from scipy import linalg
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import assert_array_equal
from sklearn.utils._testing import assert_almost_equal
from sklearn.utils._testing import assert_allclose
from sklearn.utils.fixes import parse_version
from sklearn.linear_model import LinearRegression
from sklearn.linear_model._base import _preprocess_data
from sklearn.linear_model._base import _rescale_data
from sklearn.linear_model._base import make_dataset
from sklearn.utils import check_random_state
from sklearn.datasets import make_sparse_uncorrelated
from sklearn.datasets import make_regression
from sklearn.datasets import load_iris
rng = np.random.RandomState(0)
rtol = 1e-6
def test_linear_regression():
# Test LinearRegression on a simple dataset.
# a simple dataset
X = [[1], [2]]
Y = [1, 2]
reg = LinearRegression()
reg.fit(X, Y)
assert_array_almost_equal(reg.coef_, [1])
assert_array_almost_equal(reg.intercept_, [0])
assert_array_almost_equal(reg.predict(X), [1, 2])
# test it also for degenerate input
X = [[1]]
Y = [0]
reg = LinearRegression()
reg.fit(X, Y)
assert_array_almost_equal(reg.coef_, [0])
assert_array_almost_equal(reg.intercept_, [0])
assert_array_almost_equal(reg.predict(X), [0])
def test_linear_regression_sample_weights():
# TODO: loop over sparse data as well
rng = np.random.RandomState(0)
# It would not work with under-determined systems
for n_samples, n_features in ((6, 5), ):
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)
sample_weight = 1.0 + rng.rand(n_samples)
for intercept in (True, False):
# LinearRegression with explicit sample_weight
reg = LinearRegression(fit_intercept=intercept)
reg.fit(X, y, sample_weight=sample_weight)
coefs1 = reg.coef_
inter1 = reg.intercept_
assert reg.coef_.shape == (X.shape[1], ) # sanity checks
assert reg.score(X, y) > 0.5
# Closed form of the weighted least square
# theta = (X^T W X)^(-1) * X^T W y
W = np.diag(sample_weight)
if intercept is False:
X_aug = X
else:
dummy_column = np.ones(shape=(n_samples, 1))
X_aug = np.concatenate((dummy_column, X), axis=1)
coefs2 = linalg.solve(X_aug.T.dot(W).dot(X_aug),
X_aug.T.dot(W).dot(y))
if intercept is False:
assert_array_almost_equal(coefs1, coefs2)
else:
assert_array_almost_equal(coefs1, coefs2[1:])
assert_almost_equal(inter1, coefs2[0])
def test_raises_value_error_if_sample_weights_greater_than_1d():
# Sample weights must be either scalar or 1D
n_sampless = [2, 3]
n_featuress = [3, 2]
for n_samples, n_features in zip(n_sampless, n_featuress):
X = rng.randn(n_samples, n_features)
y = rng.randn(n_samples)
sample_weights_OK = rng.randn(n_samples) ** 2 + 1
sample_weights_OK_1 = 1.
sample_weights_OK_2 = 2.
reg = LinearRegression()
# make sure the "OK" sample weights actually work
reg.fit(X, y, sample_weights_OK)
reg.fit(X, y, sample_weights_OK_1)
reg.fit(X, y, sample_weights_OK_2)
def test_fit_intercept():
# Test assertions on betas shape.
X2 = np.array([[0.38349978, 0.61650022],
[0.58853682, 0.41146318]])
X3 = np.array([[0.27677969, 0.70693172, 0.01628859],
[0.08385139, 0.20692515, 0.70922346]])
y = np.array([1, 1])
lr2_without_intercept = LinearRegression(fit_intercept=False).fit(X2, y)
lr2_with_intercept = LinearRegression().fit(X2, y)
lr3_without_intercept = LinearRegression(fit_intercept=False).fit(X3, y)
lr3_with_intercept = LinearRegression().fit(X3, y)
assert (lr2_with_intercept.coef_.shape ==
lr2_without_intercept.coef_.shape)
assert (lr3_with_intercept.coef_.shape ==
lr3_without_intercept.coef_.shape)
assert (lr2_without_intercept.coef_.ndim ==
lr3_without_intercept.coef_.ndim)
def test_linear_regression_sparse(random_state=0):
# Test that linear regression also works with sparse data
random_state = check_random_state(random_state)
for i in range(10):
n = 100
X = sparse.eye(n, n)
beta = random_state.rand(n)
y = X * beta[:, np.newaxis]
ols = LinearRegression()
ols.fit(X, y.ravel())
assert_array_almost_equal(beta, ols.coef_ + ols.intercept_)
assert_array_almost_equal(ols.predict(X) - y.ravel(), 0)
@pytest.mark.parametrize('normalize', [True, False])
@pytest.mark.parametrize('fit_intercept', [True, False])
def test_linear_regression_sparse_equal_dense(normalize, fit_intercept):
# Test that linear regression agrees between sparse and dense
rng = check_random_state(0)
n_samples = 200
n_features = 2
X = rng.randn(n_samples, n_features)
X[X < 0.1] = 0.
Xcsr = sparse.csr_matrix(X)
y = rng.rand(n_samples)
params = dict(normalize=normalize, fit_intercept=fit_intercept)
clf_dense = LinearRegression(**params)
clf_sparse = LinearRegression(**params)
clf_dense.fit(X, y)
clf_sparse.fit(Xcsr, y)
assert clf_dense.intercept_ == pytest.approx(clf_sparse.intercept_)
assert_allclose(clf_dense.coef_, clf_sparse.coef_)
def test_linear_regression_multiple_outcome(random_state=0):
# Test multiple-outcome linear regressions
X, y = make_regression(random_state=random_state)
Y = np.vstack((y, y)).T
n_features = X.shape[1]
reg = LinearRegression()
reg.fit((X), Y)
assert reg.coef_.shape == (2, n_features)
Y_pred = reg.predict(X)
reg.fit(X, y)
y_pred = reg.predict(X)
assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
def test_linear_regression_sparse_multiple_outcome(random_state=0):
# Test multiple-outcome linear regressions with sparse data
random_state = check_random_state(random_state)
X, y = make_sparse_uncorrelated(random_state=random_state)
X = sparse.coo_matrix(X)
Y = np.vstack((y, y)).T
n_features = X.shape[1]
ols = LinearRegression()
ols.fit(X, Y)
assert ols.coef_.shape == (2, n_features)
Y_pred = ols.predict(X)
ols.fit(X, y.ravel())
y_pred = ols.predict(X)
assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
def test_linear_regression_pd_sparse_dataframe_warning():
pd = pytest.importorskip('pandas')
# restrict the pd versions < '0.24.0' as they have a bug in is_sparse func
if parse_version(pd.__version__) < parse_version('0.24.0'):
pytest.skip("pandas 0.24+ required.")
# Warning is raised only when some of the columns is sparse
df = pd.DataFrame({'0': np.random.randn(10)})
for col in range(1, 4):
arr = np.random.randn(10)
arr[:8] = 0
# all columns but the first column is sparse
if col != 0:
arr = pd.arrays.SparseArray(arr, fill_value=0)
df[str(col)] = arr
msg = "pandas.DataFrame with sparse columns found."
with pytest.warns(UserWarning, match=msg):
reg = LinearRegression()
reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
# does not warn when the whole dataframe is sparse
df['0'] = pd.arrays.SparseArray(df['0'], fill_value=0)
assert hasattr(df, "sparse")
with pytest.warns(None) as record:
reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
assert not record
def test_preprocess_data():
n_samples = 200
n_features = 2
X = rng.rand(n_samples, n_features)
y = rng.rand(n_samples)
expected_X_mean = np.mean(X, axis=0)
expected_X_norm = np.std(X, axis=0) * np.sqrt(X.shape[0])
expected_y_mean = np.mean(y, axis=0)
Xt, yt, X_mean, y_mean, X_norm = \
_preprocess_data(X, y, fit_intercept=False, normalize=False)
assert_array_almost_equal(X_mean, np.zeros(n_features))
assert_array_almost_equal(y_mean, 0)
assert_array_almost_equal(X_norm, np.ones(n_features))
assert_array_almost_equal(Xt, X)
assert_array_almost_equal(yt, y)
Xt, yt, X_mean, y_mean, X_norm = \
_preprocess_data(X, y, fit_intercept=True, normalize=False)
assert_array_almost_equal(X_mean, expected_X_mean)
assert_array_almost_equal(y_mean, expected_y_mean)
assert_array_almost_equal(X_norm, np.ones(n_features))
assert_array_almost_equal(Xt, X - expected_X_mean)
assert_array_almost_equal(yt, y - expected_y_mean)
Xt, yt, X_mean, y_mean, X_norm = \
_preprocess_data(X, y, fit_intercept=True, normalize=True)
assert_array_almost_equal(X_mean, expected_X_mean)
assert_array_almost_equal(y_mean, expected_y_mean)
assert_array_almost_equal(X_norm, expected_X_norm)
assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm)
assert_array_almost_equal(yt, y - expected_y_mean)
def test_preprocess_data_multioutput():
n_samples = 200
n_features = 3
n_outputs = 2
X = rng.rand(n_samples, n_features)
y = rng.rand(n_samples, n_outputs)
expected_y_mean = np.mean(y, axis=0)
args = [X, sparse.csc_matrix(X)]
for X in args:
_, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=False,
normalize=False)
assert_array_almost_equal(y_mean, np.zeros(n_outputs))
assert_array_almost_equal(yt, y)
_, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=True,
normalize=False)
assert_array_almost_equal(y_mean, expected_y_mean)
assert_array_almost_equal(yt, y - y_mean)
_, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=True,
normalize=True)
assert_array_almost_equal(y_mean, expected_y_mean)
assert_array_almost_equal(yt, y - y_mean)
def test_preprocess_data_weighted():
n_samples = 200
n_features = 2
X = rng.rand(n_samples, n_features)
y = rng.rand(n_samples)
sample_weight = rng.rand(n_samples)
expected_X_mean = np.average(X, axis=0, weights=sample_weight)
expected_y_mean = np.average(y, axis=0, weights=sample_weight)
# XXX: if normalize=True, should we expect a weighted standard deviation?
# Currently not weighted, but calculated with respect to weighted mean
expected_X_norm = (np.sqrt(X.shape[0]) *
np.mean((X - expected_X_mean) ** 2, axis=0) ** .5)
Xt, yt, X_mean, y_mean, X_norm = \
_preprocess_data(X, y, fit_intercept=True, normalize=False,
sample_weight=sample_weight)
assert_array_almost_equal(X_mean, expected_X_mean)
assert_array_almost_equal(y_mean, expected_y_mean)
assert_array_almost_equal(X_norm, np.ones(n_features))
assert_array_almost_equal(Xt, X - expected_X_mean)
assert_array_almost_equal(yt, y - expected_y_mean)
Xt, yt, X_mean, y_mean, X_norm = \
_preprocess_data(X, y, fit_intercept=True, normalize=True,
sample_weight=sample_weight)
assert_array_almost_equal(X_mean, expected_X_mean)
assert_array_almost_equal(y_mean, expected_y_mean)
assert_array_almost_equal(X_norm, expected_X_norm)
assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm)
assert_array_almost_equal(yt, y - expected_y_mean)
def test_sparse_preprocess_data_with_return_mean():
n_samples = 200
n_features = 2
# random_state not supported yet in sparse.rand
X = sparse.rand(n_samples, n_features, density=.5) # , random_state=rng
X = X.tolil()
y = rng.rand(n_samples)
XA = X.toarray()
expected_X_norm = np.std(XA, axis=0) * np.sqrt(X.shape[0])
Xt, yt, X_mean, y_mean, X_norm = \
_preprocess_data(X, y, fit_intercept=False, normalize=False,
return_mean=True)
assert_array_almost_equal(X_mean, np.zeros(n_features))
assert_array_almost_equal(y_mean, 0)
assert_array_almost_equal(X_norm, np.ones(n_features))
assert_array_almost_equal(Xt.A, XA)
assert_array_almost_equal(yt, y)
Xt, yt, X_mean, y_mean, X_norm = \
_preprocess_data(X, y, fit_intercept=True, normalize=False,
return_mean=True)
assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
assert_array_almost_equal(y_mean, np.mean(y, axis=0))
assert_array_almost_equal(X_norm, np.ones(n_features))
assert_array_almost_equal(Xt.A, XA)
assert_array_almost_equal(yt, y - np.mean(y, axis=0))
Xt, yt, X_mean, y_mean, X_norm = \
_preprocess_data(X, y, fit_intercept=True, normalize=True,
return_mean=True)
assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
assert_array_almost_equal(y_mean, np.mean(y, axis=0))
assert_array_almost_equal(X_norm, expected_X_norm)
assert_array_almost_equal(Xt.A, XA / expected_X_norm)
assert_array_almost_equal(yt, y - np.mean(y, axis=0))
def test_csr_preprocess_data():
# Test output format of _preprocess_data, when input is csr
X, y = make_regression()
X[X < 2.5] = 0.0
csr = sparse.csr_matrix(X)
csr_, y, _, _, _ = _preprocess_data(csr, y, True)
assert csr_.getformat() == 'csr'
@pytest.mark.parametrize('is_sparse', (True, False))
@pytest.mark.parametrize('to_copy', (True, False))
def test_preprocess_copy_data_no_checks(is_sparse, to_copy):
X, y = make_regression()
X[X < 2.5] = 0.0
if is_sparse:
X = sparse.csr_matrix(X)
X_, y_, _, _, _ = _preprocess_data(X, y, True,
copy=to_copy, check_input=False)
if to_copy and is_sparse:
assert not np.may_share_memory(X_.data, X.data)
elif to_copy:
assert not np.may_share_memory(X_, X)
elif is_sparse:
assert np.may_share_memory(X_.data, X.data)
else:
assert np.may_share_memory(X_, X)
def test_dtype_preprocess_data():
n_samples = 200
n_features = 2
X = rng.rand(n_samples, n_features)
y = rng.rand(n_samples)
X_32 = np.asarray(X, dtype=np.float32)
y_32 = np.asarray(y, dtype=np.float32)
X_64 = np.asarray(X, dtype=np.float64)
y_64 = np.asarray(y, dtype=np.float64)
for fit_intercept in [True, False]:
for normalize in [True, False]:
Xt_32, yt_32, X_mean_32, y_mean_32, X_norm_32 = _preprocess_data(
X_32, y_32, fit_intercept=fit_intercept, normalize=normalize,
return_mean=True)
Xt_64, yt_64, X_mean_64, y_mean_64, X_norm_64 = _preprocess_data(
X_64, y_64, fit_intercept=fit_intercept, normalize=normalize,
return_mean=True)
Xt_3264, yt_3264, X_mean_3264, y_mean_3264, X_norm_3264 = (
_preprocess_data(X_32, y_64, fit_intercept=fit_intercept,
normalize=normalize, return_mean=True))
Xt_6432, yt_6432, X_mean_6432, y_mean_6432, X_norm_6432 = (
_preprocess_data(X_64, y_32, fit_intercept=fit_intercept,
normalize=normalize, return_mean=True))
assert Xt_32.dtype == np.float32
assert yt_32.dtype == np.float32
assert X_mean_32.dtype == np.float32
assert y_mean_32.dtype == np.float32
assert X_norm_32.dtype == np.float32
assert Xt_64.dtype == np.float64
assert yt_64.dtype == np.float64
assert X_mean_64.dtype == np.float64
assert y_mean_64.dtype == np.float64
assert X_norm_64.dtype == np.float64
assert Xt_3264.dtype == np.float32
assert yt_3264.dtype == np.float32
assert X_mean_3264.dtype == np.float32
assert y_mean_3264.dtype == np.float32
assert X_norm_3264.dtype == np.float32
assert Xt_6432.dtype == np.float64
assert yt_6432.dtype == np.float64
assert X_mean_6432.dtype == np.float64
assert y_mean_6432.dtype == np.float64
assert X_norm_6432.dtype == np.float64
assert X_32.dtype == np.float32
assert y_32.dtype == np.float32
assert X_64.dtype == np.float64
assert y_64.dtype == np.float64
assert_array_almost_equal(Xt_32, Xt_64)
assert_array_almost_equal(yt_32, yt_64)
assert_array_almost_equal(X_mean_32, X_mean_64)
assert_array_almost_equal(y_mean_32, y_mean_64)
assert_array_almost_equal(X_norm_32, X_norm_64)
@pytest.mark.parametrize('n_targets', [None, 2])
def test_rescale_data_dense(n_targets):
n_samples = 200
n_features = 2
sample_weight = 1.0 + rng.rand(n_samples)
X = rng.rand(n_samples, n_features)
if n_targets is None:
y = rng.rand(n_samples)
else:
y = rng.rand(n_samples, n_targets)
rescaled_X, rescaled_y = _rescale_data(X, y, sample_weight)
rescaled_X2 = X * np.sqrt(sample_weight)[:, np.newaxis]
if n_targets is None:
rescaled_y2 = y * np.sqrt(sample_weight)
else:
rescaled_y2 = y * np.sqrt(sample_weight)[:, np.newaxis]
assert_array_almost_equal(rescaled_X, rescaled_X2)
assert_array_almost_equal(rescaled_y, rescaled_y2)
def test_fused_types_make_dataset():
iris = load_iris()
X_32 = iris.data.astype(np.float32)
y_32 = iris.target.astype(np.float32)
X_csr_32 = sparse.csr_matrix(X_32)
sample_weight_32 = np.arange(y_32.size, dtype=np.float32)
X_64 = iris.data.astype(np.float64)
y_64 = iris.target.astype(np.float64)
X_csr_64 = sparse.csr_matrix(X_64)
sample_weight_64 = np.arange(y_64.size, dtype=np.float64)
# array
dataset_32, _ = make_dataset(X_32, y_32, sample_weight_32)
dataset_64, _ = make_dataset(X_64, y_64, sample_weight_64)
xi_32, yi_32, _, _ = dataset_32._next_py()
xi_64, yi_64, _, _ = dataset_64._next_py()
xi_data_32, _, _ = xi_32
xi_data_64, _, _ = xi_64
assert xi_data_32.dtype == np.float32
assert xi_data_64.dtype == np.float64
assert_allclose(yi_64, yi_32, rtol=rtol)
# csr
datasetcsr_32, _ = make_dataset(X_csr_32, y_32, sample_weight_32)
datasetcsr_64, _ = make_dataset(X_csr_64, y_64, sample_weight_64)
xicsr_32, yicsr_32, _, _ = datasetcsr_32._next_py()
xicsr_64, yicsr_64, _, _ = datasetcsr_64._next_py()
xicsr_data_32, _, _ = xicsr_32
xicsr_data_64, _, _ = xicsr_64
assert xicsr_data_32.dtype == np.float32
assert xicsr_data_64.dtype == np.float64
assert_allclose(xicsr_data_64, xicsr_data_32, rtol=rtol)
assert_allclose(yicsr_64, yicsr_32, rtol=rtol)
assert_array_equal(xi_data_32, xicsr_data_32)
assert_array_equal(xi_data_64, xicsr_data_64)
assert_array_equal(yi_32, yicsr_32)
assert_array_equal(yi_64, yicsr_64)

View file

@ -0,0 +1,274 @@
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
# Fabian Pedregosa <fabian.pedregosa@inria.fr>
#
# License: BSD 3 clause
from math import log
import numpy as np
from scipy.linalg import pinvh
import pytest
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import assert_almost_equal
from sklearn.utils._testing import assert_array_less
from sklearn.utils._testing import assert_raise_message
from sklearn.utils import check_random_state
from sklearn.linear_model import BayesianRidge, ARDRegression
from sklearn.linear_model import Ridge
from sklearn import datasets
from sklearn.utils.extmath import fast_logdet
diabetes = datasets.load_diabetes()
def test_n_iter():
"""Check value of n_iter."""
X = np.array([[1], [2], [6], [8], [10]])
y = np.array([1, 2, 6, 8, 10])
clf = BayesianRidge(n_iter=0)
msg = "n_iter should be greater than or equal to 1."
assert_raise_message(ValueError, msg, clf.fit, X, y)
def test_bayesian_ridge_scores():
"""Check scores attribute shape"""
X, y = diabetes.data, diabetes.target
clf = BayesianRidge(compute_score=True)
clf.fit(X, y)
assert clf.scores_.shape == (clf.n_iter_ + 1,)
def test_bayesian_ridge_score_values():
"""Check value of score on toy example.
Compute log marginal likelihood with equation (36) in Sparse Bayesian
Learning and the Relevance Vector Machine (Tipping, 2001):
- 0.5 * (log |Id/alpha + X.X^T/lambda| +
y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi))
+ lambda_1 * log(lambda) - lambda_2 * lambda
+ alpha_1 * log(alpha) - alpha_2 * alpha
and check equality with the score computed during training.
"""
X, y = diabetes.data, diabetes.target
n_samples = X.shape[0]
# check with initial values of alpha and lambda (see code for the values)
eps = np.finfo(np.float64).eps
alpha_ = 1. / (np.var(y) + eps)
lambda_ = 1.
# value of the parameters of the Gamma hyperpriors
alpha_1 = 0.1
alpha_2 = 0.1
lambda_1 = 0.1
lambda_2 = 0.1
# compute score using formula of docstring
score = lambda_1 * log(lambda_) - lambda_2 * lambda_
score += alpha_1 * log(alpha_) - alpha_2 * alpha_
M = 1. / alpha_ * np.eye(n_samples) + 1. / lambda_ * np.dot(X, X.T)
M_inv = pinvh(M)
score += - 0.5 * (fast_logdet(M) + np.dot(y.T, np.dot(M_inv, y)) +
n_samples * log(2 * np.pi))
# compute score with BayesianRidge
clf = BayesianRidge(alpha_1=alpha_1, alpha_2=alpha_2,
lambda_1=lambda_1, lambda_2=lambda_2,
n_iter=1, fit_intercept=False, compute_score=True)
clf.fit(X, y)
assert_almost_equal(clf.scores_[0], score, decimal=9)
def test_bayesian_ridge_parameter():
# Test correctness of lambda_ and alpha_ parameters (GitHub issue #8224)
X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
y = np.array([1, 2, 3, 2, 0, 4, 5]).T
# A Ridge regression model using an alpha value equal to the ratio of
# lambda_ and alpha_ from the Bayesian Ridge model must be identical
br_model = BayesianRidge(compute_score=True).fit(X, y)
rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(X, y)
assert_array_almost_equal(rr_model.coef_, br_model.coef_)
assert_almost_equal(rr_model.intercept_, br_model.intercept_)
def test_bayesian_sample_weights():
# Test correctness of the sample_weights method
X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
y = np.array([1, 2, 3, 2, 0, 4, 5]).T
w = np.array([4, 3, 3, 1, 1, 2, 3]).T
# A Ridge regression model using an alpha value equal to the ratio of
# lambda_ and alpha_ from the Bayesian Ridge model must be identical
br_model = BayesianRidge(compute_score=True).fit(X, y, sample_weight=w)
rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(
X, y, sample_weight=w)
assert_array_almost_equal(rr_model.coef_, br_model.coef_)
assert_almost_equal(rr_model.intercept_, br_model.intercept_)
def test_toy_bayesian_ridge_object():
# Test BayesianRidge on toy
X = np.array([[1], [2], [6], [8], [10]])
Y = np.array([1, 2, 6, 8, 10])
clf = BayesianRidge(compute_score=True)
clf.fit(X, Y)
# Check that the model could approximately learn the identity function
test = [[1], [3], [4]]
assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
def test_bayesian_initial_params():
# Test BayesianRidge with initial values (alpha_init, lambda_init)
X = np.vander(np.linspace(0, 4, 5), 4)
y = np.array([0., 1., 0., -1., 0.]) # y = (x^3 - 6x^2 + 8x) / 3
# In this case, starting from the default initial values will increase
# the bias of the fitted curve. So, lambda_init should be small.
reg = BayesianRidge(alpha_init=1., lambda_init=1e-3)
# Check the R2 score nearly equals to one.
r2 = reg.fit(X, y).score(X, y)
assert_almost_equal(r2, 1.)
def test_prediction_bayesian_ridge_ard_with_constant_input():
# Test BayesianRidge and ARDRegression predictions for edge case of
# constant target vectors
n_samples = 4
n_features = 5
random_state = check_random_state(42)
constant_value = random_state.rand()
X = random_state.random_sample((n_samples, n_features))
y = np.full(n_samples, constant_value,
dtype=np.array(constant_value).dtype)
expected = np.full(n_samples, constant_value,
dtype=np.array(constant_value).dtype)
for clf in [BayesianRidge(), ARDRegression()]:
y_pred = clf.fit(X, y).predict(X)
assert_array_almost_equal(y_pred, expected)
def test_std_bayesian_ridge_ard_with_constant_input():
# Test BayesianRidge and ARDRegression standard dev. for edge case of
# constant target vector
# The standard dev. should be relatively small (< 0.01 is tested here)
n_samples = 10
n_features = 5
random_state = check_random_state(42)
constant_value = random_state.rand()
X = random_state.random_sample((n_samples, n_features))
y = np.full(n_samples, constant_value,
dtype=np.array(constant_value).dtype)
expected_upper_boundary = 0.01
for clf in [BayesianRidge(), ARDRegression()]:
_, y_std = clf.fit(X, y).predict(X, return_std=True)
assert_array_less(y_std, expected_upper_boundary)
def test_update_of_sigma_in_ard():
# Checks that `sigma_` is updated correctly after the last iteration
# of the ARDRegression algorithm. See issue #10128.
X = np.array([[1, 0],
[0, 0]])
y = np.array([0, 0])
clf = ARDRegression(n_iter=1)
clf.fit(X, y)
# With the inputs above, ARDRegression prunes both of the two coefficients
# in the first iteration. Hence, the expected shape of `sigma_` is (0, 0).
assert clf.sigma_.shape == (0, 0)
# Ensure that no error is thrown at prediction stage
clf.predict(X, return_std=True)
def test_toy_ard_object():
# Test BayesianRegression ARD classifier
X = np.array([[1], [2], [3]])
Y = np.array([1, 2, 3])
clf = ARDRegression(compute_score=True)
clf.fit(X, Y)
# Check that the model could approximately learn the identity function
test = [[1], [3], [4]]
assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
@pytest.mark.parametrize('seed', range(100))
@pytest.mark.parametrize('n_samples, n_features', ((10, 100), (100, 10)))
def test_ard_accuracy_on_easy_problem(seed, n_samples, n_features):
# Check that ARD converges with reasonable accuracy on an easy problem
# (Github issue #14055)
X = np.random.RandomState(seed=seed).normal(size=(250, 3))
y = X[:, 1]
regressor = ARDRegression()
regressor.fit(X, y)
abs_coef_error = np.abs(1 - regressor.coef_[1])
assert abs_coef_error < 1e-10
def test_return_std():
# Test return_std option for both Bayesian regressors
def f(X):
return np.dot(X, w) + b
def f_noise(X, noise_mult):
return f(X) + np.random.randn(X.shape[0]) * noise_mult
d = 5
n_train = 50
n_test = 10
w = np.array([1.0, 0.0, 1.0, -1.0, 0.0])
b = 1.0
X = np.random.random((n_train, d))
X_test = np.random.random((n_test, d))
for decimal, noise_mult in enumerate([1, 0.1, 0.01]):
y = f_noise(X, noise_mult)
m1 = BayesianRidge()
m1.fit(X, y)
y_mean1, y_std1 = m1.predict(X_test, return_std=True)
assert_array_almost_equal(y_std1, noise_mult, decimal=decimal)
m2 = ARDRegression()
m2.fit(X, y)
y_mean2, y_std2 = m2.predict(X_test, return_std=True)
assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
@pytest.mark.parametrize('seed', range(10))
def test_update_sigma(seed):
# make sure the two update_sigma() helpers are equivalent. The woodbury
# formula is used when n_samples < n_features, and the other one is used
# otherwise.
rng = np.random.RandomState(seed)
# set n_samples == n_features to avoid instability issues when inverting
# the matrices. Using the woodbury formula would be unstable when
# n_samples > n_features
n_samples = n_features = 10
X = rng.randn(n_samples, n_features)
alpha = 1
lmbda = np.arange(1, n_features + 1)
keep_lambda = np.array([True] * n_features)
reg = ARDRegression()
sigma = reg._update_sigma(X, alpha, lmbda, keep_lambda)
sigma_woodbury = reg._update_sigma_woodbury(X, alpha, lmbda, keep_lambda)
np.testing.assert_allclose(sigma, sigma_woodbury)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,211 @@
# Authors: Manoj Kumar mks542@nyu.edu
# License: BSD 3 clause
import numpy as np
from scipy import optimize, sparse
from sklearn.utils._testing import assert_almost_equal
from sklearn.utils._testing import assert_array_equal
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.datasets import make_regression
from sklearn.linear_model import (
HuberRegressor, LinearRegression, SGDRegressor, Ridge)
from sklearn.linear_model._huber import _huber_loss_and_gradient
def make_regression_with_outliers(n_samples=50, n_features=20):
rng = np.random.RandomState(0)
# Generate data with outliers by replacing 10% of the samples with noise.
X, y = make_regression(
n_samples=n_samples, n_features=n_features,
random_state=0, noise=0.05)
# Replace 10% of the sample with noise.
num_noise = int(0.1 * n_samples)
random_samples = rng.randint(0, n_samples, num_noise)
X[random_samples, :] = 2.0 * rng.normal(0, 1, (num_noise, X.shape[1]))
return X, y
def test_huber_equals_lr_for_high_epsilon():
# Test that Ridge matches LinearRegression for large epsilon
X, y = make_regression_with_outliers()
lr = LinearRegression()
lr.fit(X, y)
huber = HuberRegressor(epsilon=1e3, alpha=0.0)
huber.fit(X, y)
assert_almost_equal(huber.coef_, lr.coef_, 3)
assert_almost_equal(huber.intercept_, lr.intercept_, 2)
def test_huber_max_iter():
X, y = make_regression_with_outliers()
huber = HuberRegressor(max_iter=1)
huber.fit(X, y)
assert huber.n_iter_ == huber.max_iter
def test_huber_gradient():
# Test that the gradient calculated by _huber_loss_and_gradient is correct
rng = np.random.RandomState(1)
X, y = make_regression_with_outliers()
sample_weight = rng.randint(1, 3, (y.shape[0]))
def loss_func(x, *args):
return _huber_loss_and_gradient(x, *args)[0]
def grad_func(x, *args):
return _huber_loss_and_gradient(x, *args)[1]
# Check using optimize.check_grad that the gradients are equal.
for _ in range(5):
# Check for both fit_intercept and otherwise.
for n_features in [X.shape[1] + 1, X.shape[1] + 2]:
w = rng.randn(n_features)
w[-1] = np.abs(w[-1])
grad_same = optimize.check_grad(
loss_func, grad_func, w, X, y, 0.01, 0.1, sample_weight)
assert_almost_equal(grad_same, 1e-6, 4)
def test_huber_sample_weights():
# Test sample_weights implementation in HuberRegressor"""
X, y = make_regression_with_outliers()
huber = HuberRegressor()
huber.fit(X, y)
huber_coef = huber.coef_
huber_intercept = huber.intercept_
# Rescale coefs before comparing with assert_array_almost_equal to make
# sure that the number of decimal places used is somewhat insensitive to
# the amplitude of the coefficients and therefore to the scale of the
# data and the regularization parameter
scale = max(np.mean(np.abs(huber.coef_)),
np.mean(np.abs(huber.intercept_)))
huber.fit(X, y, sample_weight=np.ones(y.shape[0]))
assert_array_almost_equal(huber.coef_ / scale, huber_coef / scale)
assert_array_almost_equal(huber.intercept_ / scale,
huber_intercept / scale)
X, y = make_regression_with_outliers(n_samples=5, n_features=20)
X_new = np.vstack((X, np.vstack((X[1], X[1], X[3]))))
y_new = np.concatenate((y, [y[1]], [y[1]], [y[3]]))
huber.fit(X_new, y_new)
huber_coef = huber.coef_
huber_intercept = huber.intercept_
sample_weight = np.ones(X.shape[0])
sample_weight[1] = 3
sample_weight[3] = 2
huber.fit(X, y, sample_weight=sample_weight)
assert_array_almost_equal(huber.coef_ / scale, huber_coef / scale)
assert_array_almost_equal(huber.intercept_ / scale,
huber_intercept / scale)
# Test sparse implementation with sample weights.
X_csr = sparse.csr_matrix(X)
huber_sparse = HuberRegressor()
huber_sparse.fit(X_csr, y, sample_weight=sample_weight)
assert_array_almost_equal(huber_sparse.coef_ / scale,
huber_coef / scale)
def test_huber_sparse():
X, y = make_regression_with_outliers()
huber = HuberRegressor(alpha=0.1)
huber.fit(X, y)
X_csr = sparse.csr_matrix(X)
huber_sparse = HuberRegressor(alpha=0.1)
huber_sparse.fit(X_csr, y)
assert_array_almost_equal(huber_sparse.coef_, huber.coef_)
assert_array_equal(huber.outliers_, huber_sparse.outliers_)
def test_huber_scaling_invariant():
# Test that outliers filtering is scaling independent.
X, y = make_regression_with_outliers()
huber = HuberRegressor(fit_intercept=False, alpha=0.0, max_iter=100)
huber.fit(X, y)
n_outliers_mask_1 = huber.outliers_
assert not np.all(n_outliers_mask_1)
huber.fit(X, 2. * y)
n_outliers_mask_2 = huber.outliers_
assert_array_equal(n_outliers_mask_2, n_outliers_mask_1)
huber.fit(2. * X, 2. * y)
n_outliers_mask_3 = huber.outliers_
assert_array_equal(n_outliers_mask_3, n_outliers_mask_1)
def test_huber_and_sgd_same_results():
# Test they should converge to same coefficients for same parameters
X, y = make_regression_with_outliers(n_samples=10, n_features=2)
# Fit once to find out the scale parameter. Scale down X and y by scale
# so that the scale parameter is optimized to 1.0
huber = HuberRegressor(fit_intercept=False, alpha=0.0, max_iter=100,
epsilon=1.35)
huber.fit(X, y)
X_scale = X / huber.scale_
y_scale = y / huber.scale_
huber.fit(X_scale, y_scale)
assert_almost_equal(huber.scale_, 1.0, 3)
sgdreg = SGDRegressor(
alpha=0.0, loss="huber", shuffle=True, random_state=0, max_iter=10000,
fit_intercept=False, epsilon=1.35, tol=None)
sgdreg.fit(X_scale, y_scale)
assert_array_almost_equal(huber.coef_, sgdreg.coef_, 1)
def test_huber_warm_start():
X, y = make_regression_with_outliers()
huber_warm = HuberRegressor(
alpha=1.0, max_iter=10000, warm_start=True, tol=1e-1)
huber_warm.fit(X, y)
huber_warm_coef = huber_warm.coef_.copy()
huber_warm.fit(X, y)
# SciPy performs the tol check after doing the coef updates, so
# these would be almost same but not equal.
assert_array_almost_equal(huber_warm.coef_, huber_warm_coef, 1)
assert huber_warm.n_iter_ == 0
def test_huber_better_r2_score():
# Test that huber returns a better r2 score than non-outliers"""
X, y = make_regression_with_outliers()
huber = HuberRegressor(alpha=0.01)
huber.fit(X, y)
linear_loss = np.dot(X, huber.coef_) + huber.intercept_ - y
mask = np.abs(linear_loss) < huber.epsilon * huber.scale_
huber_score = huber.score(X[mask], y[mask])
huber_outlier_score = huber.score(X[~mask], y[~mask])
# The Ridge regressor should be influenced by the outliers and hence
# give a worse score on the non-outliers as compared to the huber
# regressor.
ridge = Ridge(alpha=0.01)
ridge.fit(X, y)
ridge_score = ridge.score(X[mask], y[mask])
ridge_outlier_score = ridge.score(X[~mask], y[~mask])
assert huber_score > ridge_score
# The huber model should also fit poorly on the outliers.
assert ridge_outlier_score > huber_outlier_score
def test_huber_bool():
# Test that it does not crash with bool data
X, y = make_regression(n_samples=200, n_features=2, noise=4.0,
random_state=0)
X_bool = X > 0
HuberRegressor().fit(X_bool, y)

View file

@ -0,0 +1,775 @@
import warnings
import numpy as np
import pytest
from scipy import linalg
from sklearn.base import clone
from sklearn.model_selection import train_test_split
from sklearn.utils._testing import assert_allclose
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import assert_raises
from sklearn.utils._testing import ignore_warnings
from sklearn.utils._testing import assert_warns
from sklearn.utils._testing import TempMemmap
from sklearn.utils.fixes import np_version, parse_version
from sklearn.exceptions import ConvergenceWarning
from sklearn import linear_model, datasets
from sklearn.linear_model._least_angle import _lars_path_residues
from sklearn.linear_model import LassoLarsIC, lars_path
from sklearn.linear_model import Lars, LassoLars
# TODO: use another dataset that has multiple drops
diabetes = datasets.load_diabetes()
X, y = diabetes.data, diabetes.target
G = np.dot(X.T, X)
Xy = np.dot(X.T, y)
n_samples = y.size
def test_simple():
# Principle of Lars is to keep covariances tied and decreasing
# also test verbose output
from io import StringIO
import sys
old_stdout = sys.stdout
try:
sys.stdout = StringIO()
_, _, coef_path_ = linear_model.lars_path(
X, y, method='lar', verbose=10)
sys.stdout = old_stdout
for i, coef_ in enumerate(coef_path_.T):
res = y - np.dot(X, coef_)
cov = np.dot(X.T, res)
C = np.max(abs(cov))
eps = 1e-3
ocur = len(cov[C - eps < abs(cov)])
if i < X.shape[1]:
assert ocur == i + 1
else:
# no more than max_pred variables can go into the active set
assert ocur == X.shape[1]
finally:
sys.stdout = old_stdout
def test_simple_precomputed():
# The same, with precomputed Gram matrix
_, _, coef_path_ = linear_model.lars_path(
X, y, Gram=G, method='lar')
for i, coef_ in enumerate(coef_path_.T):
res = y - np.dot(X, coef_)
cov = np.dot(X.T, res)
C = np.max(abs(cov))
eps = 1e-3
ocur = len(cov[C - eps < abs(cov)])
if i < X.shape[1]:
assert ocur == i + 1
else:
# no more than max_pred variables can go into the active set
assert ocur == X.shape[1]
def _assert_same_lars_path_result(output1, output2):
assert len(output1) == len(output2)
for o1, o2 in zip(output1, output2):
assert_allclose(o1, o2)
@pytest.mark.parametrize('method', ['lar', 'lasso'])
@pytest.mark.parametrize('return_path', [True, False])
def test_lars_path_gram_equivalent(method, return_path):
_assert_same_lars_path_result(
linear_model.lars_path_gram(
Xy=Xy, Gram=G, n_samples=n_samples, method=method,
return_path=return_path),
linear_model.lars_path(
X, y, Gram=G, method=method,
return_path=return_path))
def test_x_none_gram_none_raises_value_error():
# Test that lars_path with no X and Gram raises exception
Xy = np.dot(X.T, y)
assert_raises(ValueError, linear_model.lars_path, None, y, Gram=None,
Xy=Xy)
def test_all_precomputed():
# Test that lars_path with precomputed Gram and Xy gives the right answer
G = np.dot(X.T, X)
Xy = np.dot(X.T, y)
for method in 'lar', 'lasso':
output = linear_model.lars_path(X, y, method=method)
output_pre = linear_model.lars_path(X, y, Gram=G, Xy=Xy,
method=method)
for expected, got in zip(output, output_pre):
assert_array_almost_equal(expected, got)
@pytest.mark.filterwarnings('ignore: `rcond` parameter will change')
# numpy deprecation
def test_lars_lstsq():
# Test that Lars gives least square solution at the end
# of the path
X1 = 3 * X # use un-normalized dataset
clf = linear_model.LassoLars(alpha=0.)
clf.fit(X1, y)
# Avoid FutureWarning about default value change when numpy >= 1.14
rcond = None if np_version >= parse_version('1.14') else -1
coef_lstsq = np.linalg.lstsq(X1, y, rcond=rcond)[0]
assert_array_almost_equal(clf.coef_, coef_lstsq)
@pytest.mark.filterwarnings('ignore:`rcond` parameter will change')
# numpy deprecation
def test_lasso_gives_lstsq_solution():
# Test that Lars Lasso gives least square solution at the end
# of the path
_, _, coef_path_ = linear_model.lars_path(X, y, method='lasso')
coef_lstsq = np.linalg.lstsq(X, y)[0]
assert_array_almost_equal(coef_lstsq, coef_path_[:, -1])
def test_collinearity():
# Check that lars_path is robust to collinearity in input
X = np.array([[3., 3., 1.],
[2., 2., 0.],
[1., 1., 0]])
y = np.array([1., 0., 0])
rng = np.random.RandomState(0)
f = ignore_warnings
_, _, coef_path_ = f(linear_model.lars_path)(X, y, alpha_min=0.01)
assert not np.isnan(coef_path_).any()
residual = np.dot(X, coef_path_[:, -1]) - y
assert (residual ** 2).sum() < 1. # just make sure it's bounded
n_samples = 10
X = rng.rand(n_samples, 5)
y = np.zeros(n_samples)
_, _, coef_path_ = linear_model.lars_path(X, y, Gram='auto', copy_X=False,
copy_Gram=False, alpha_min=0.,
method='lasso', verbose=0,
max_iter=500)
assert_array_almost_equal(coef_path_, np.zeros_like(coef_path_))
def test_no_path():
# Test that the ``return_path=False`` option returns the correct output
alphas_, _, coef_path_ = linear_model.lars_path(
X, y, method='lar')
alpha_, _, coef = linear_model.lars_path(
X, y, method='lar', return_path=False)
assert_array_almost_equal(coef, coef_path_[:, -1])
assert alpha_ == alphas_[-1]
def test_no_path_precomputed():
# Test that the ``return_path=False`` option with Gram remains correct
alphas_, _, coef_path_ = linear_model.lars_path(
X, y, method='lar', Gram=G)
alpha_, _, coef = linear_model.lars_path(
X, y, method='lar', Gram=G, return_path=False)
assert_array_almost_equal(coef, coef_path_[:, -1])
assert alpha_ == alphas_[-1]
def test_no_path_all_precomputed():
# Test that the ``return_path=False`` option with Gram and Xy remains
# correct
X, y = 3 * diabetes.data, diabetes.target
G = np.dot(X.T, X)
Xy = np.dot(X.T, y)
alphas_, _, coef_path_ = linear_model.lars_path(
X, y, method='lasso', Xy=Xy, Gram=G, alpha_min=0.9)
alpha_, _, coef = linear_model.lars_path(
X, y, method='lasso', Gram=G, Xy=Xy, alpha_min=0.9, return_path=False)
assert_array_almost_equal(coef, coef_path_[:, -1])
assert alpha_ == alphas_[-1]
@pytest.mark.parametrize(
'classifier',
[linear_model.Lars, linear_model.LarsCV, linear_model.LassoLarsIC])
def test_lars_precompute(classifier):
# Check for different values of precompute
G = np.dot(X.T, X)
clf = classifier(precompute=G)
output_1 = ignore_warnings(clf.fit)(X, y).coef_
for precompute in [True, False, 'auto', None]:
clf = classifier(precompute=precompute)
output_2 = clf.fit(X, y).coef_
assert_array_almost_equal(output_1, output_2, decimal=8)
def test_singular_matrix():
# Test when input is a singular matrix
X1 = np.array([[1, 1.], [1., 1.]])
y1 = np.array([1, 1])
_, _, coef_path = linear_model.lars_path(X1, y1)
assert_array_almost_equal(coef_path.T, [[0, 0], [1, 0]])
def test_rank_deficient_design():
# consistency test that checks that LARS Lasso is handling rank
# deficient input data (with n_features < rank) in the same way
# as coordinate descent Lasso
y = [5, 0, 5]
for X in (
[[5, 0],
[0, 5],
[10, 10]],
[[10, 10, 0],
[1e-32, 0, 0],
[0, 0, 1]]
):
# To be able to use the coefs to compute the objective function,
# we need to turn off normalization
lars = linear_model.LassoLars(.1, normalize=False)
coef_lars_ = lars.fit(X, y).coef_
obj_lars = (1. / (2. * 3.)
* linalg.norm(y - np.dot(X, coef_lars_)) ** 2
+ .1 * linalg.norm(coef_lars_, 1))
coord_descent = linear_model.Lasso(.1, tol=1e-6, normalize=False)
coef_cd_ = coord_descent.fit(X, y).coef_
obj_cd = ((1. / (2. * 3.)) * linalg.norm(y - np.dot(X, coef_cd_)) ** 2
+ .1 * linalg.norm(coef_cd_, 1))
assert obj_lars < obj_cd * (1. + 1e-8)
def test_lasso_lars_vs_lasso_cd():
# Test that LassoLars and Lasso using coordinate descent give the
# same results.
X = 3 * diabetes.data
alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
for c, a in zip(lasso_path.T, alphas):
if a == 0:
continue
lasso_cd.alpha = a
lasso_cd.fit(X, y)
error = linalg.norm(c - lasso_cd.coef_)
assert error < 0.01
# similar test, with the classifiers
for alpha in np.linspace(1e-2, 1 - 1e-2, 20):
clf1 = linear_model.LassoLars(alpha=alpha, normalize=False).fit(X, y)
clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8,
normalize=False).fit(X, y)
err = linalg.norm(clf1.coef_ - clf2.coef_)
assert err < 1e-3
# same test, with normalized data
X = diabetes.data
alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
lasso_cd = linear_model.Lasso(fit_intercept=False, normalize=True,
tol=1e-8)
for c, a in zip(lasso_path.T, alphas):
if a == 0:
continue
lasso_cd.alpha = a
lasso_cd.fit(X, y)
error = linalg.norm(c - lasso_cd.coef_)
assert error < 0.01
def test_lasso_lars_vs_lasso_cd_early_stopping():
# Test that LassoLars and Lasso using coordinate descent give the
# same results when early stopping is used.
# (test : before, in the middle, and in the last part of the path)
alphas_min = [10, 0.9, 1e-4]
for alpha_min in alphas_min:
alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
alpha_min=alpha_min)
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
lasso_cd.alpha = alphas[-1]
lasso_cd.fit(X, y)
error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
assert error < 0.01
# same test, with normalization
for alpha_min in alphas_min:
alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
alpha_min=alpha_min)
lasso_cd = linear_model.Lasso(normalize=True, tol=1e-8)
lasso_cd.alpha = alphas[-1]
lasso_cd.fit(X, y)
error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
assert error < 0.01
def test_lasso_lars_path_length():
# Test that the path length of the LassoLars is right
lasso = linear_model.LassoLars()
lasso.fit(X, y)
lasso2 = linear_model.LassoLars(alpha=lasso.alphas_[2])
lasso2.fit(X, y)
assert_array_almost_equal(lasso.alphas_[:3], lasso2.alphas_)
# Also check that the sequence of alphas is always decreasing
assert np.all(np.diff(lasso.alphas_) < 0)
def test_lasso_lars_vs_lasso_cd_ill_conditioned():
# Test lasso lars on a very ill-conditioned design, and check that
# it does not blow up, and stays somewhat close to a solution given
# by the coordinate descent solver
# Also test that lasso_path (using lars_path output style) gives
# the same result as lars_path and previous lasso output style
# under these conditions.
rng = np.random.RandomState(42)
# Generate data
n, m = 70, 100
k = 5
X = rng.randn(n, m)
w = np.zeros((m, 1))
i = np.arange(0, m)
rng.shuffle(i)
supp = i[:k]
w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
y = np.dot(X, w)
sigma = 0.2
y += sigma * rng.rand(*y.shape)
y = y.squeeze()
lars_alphas, _, lars_coef = linear_model.lars_path(X, y, method='lasso')
_, lasso_coef2, _ = linear_model.lasso_path(X, y,
alphas=lars_alphas,
tol=1e-6,
fit_intercept=False)
assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
def test_lasso_lars_vs_lasso_cd_ill_conditioned2():
# Create an ill-conditioned situation in which the LARS has to go
# far in the path to converge, and check that LARS and coordinate
# descent give the same answers
# Note it used to be the case that Lars had to use the drop for good
# strategy for this but this is no longer the case with the
# equality_tolerance checks
X = [[1e20, 1e20, 0],
[-1e-32, 0, 0],
[1, 1, 1]]
y = [10, 10, 1]
alpha = .0001
def objective_function(coef):
return (1. / (2. * len(X)) * linalg.norm(y - np.dot(X, coef)) ** 2
+ alpha * linalg.norm(coef, 1))
lars = linear_model.LassoLars(alpha=alpha, normalize=False)
assert_warns(ConvergenceWarning, lars.fit, X, y)
lars_coef_ = lars.coef_
lars_obj = objective_function(lars_coef_)
coord_descent = linear_model.Lasso(alpha=alpha, tol=1e-4, normalize=False)
cd_coef_ = coord_descent.fit(X, y).coef_
cd_obj = objective_function(cd_coef_)
assert lars_obj < cd_obj * (1. + 1e-8)
def test_lars_add_features():
# assure that at least some features get added if necessary
# test for 6d2b4c
# Hilbert matrix
n = 5
H = 1. / (np.arange(1, n + 1) + np.arange(n)[:, np.newaxis])
clf = linear_model.Lars(fit_intercept=False).fit(
H, np.arange(n))
assert np.all(np.isfinite(clf.coef_))
def test_lars_n_nonzero_coefs(verbose=False):
lars = linear_model.Lars(n_nonzero_coefs=6, verbose=verbose)
lars.fit(X, y)
assert len(lars.coef_.nonzero()[0]) == 6
# The path should be of length 6 + 1 in a Lars going down to 6
# non-zero coefs
assert len(lars.alphas_) == 7
@ignore_warnings
def test_multitarget():
# Assure that estimators receiving multidimensional y do the right thing
Y = np.vstack([y, y ** 2]).T
n_targets = Y.shape[1]
estimators = [
linear_model.LassoLars(),
linear_model.Lars(),
# regression test for gh-1615
linear_model.LassoLars(fit_intercept=False),
linear_model.Lars(fit_intercept=False),
]
for estimator in estimators:
estimator.fit(X, Y)
Y_pred = estimator.predict(X)
alphas, active, coef, path = (estimator.alphas_, estimator.active_,
estimator.coef_, estimator.coef_path_)
for k in range(n_targets):
estimator.fit(X, Y[:, k])
y_pred = estimator.predict(X)
assert_array_almost_equal(alphas[k], estimator.alphas_)
assert_array_almost_equal(active[k], estimator.active_)
assert_array_almost_equal(coef[k], estimator.coef_)
assert_array_almost_equal(path[k], estimator.coef_path_)
assert_array_almost_equal(Y_pred[:, k], y_pred)
def test_lars_cv():
# Test the LassoLarsCV object by checking that the optimal alpha
# increases as the number of samples increases.
# This property is not actually guaranteed in general and is just a
# property of the given dataset, with the given steps chosen.
old_alpha = 0
lars_cv = linear_model.LassoLarsCV()
for length in (400, 200, 100):
X = diabetes.data[:length]
y = diabetes.target[:length]
lars_cv.fit(X, y)
np.testing.assert_array_less(old_alpha, lars_cv.alpha_)
old_alpha = lars_cv.alpha_
assert not hasattr(lars_cv, 'n_nonzero_coefs')
def test_lars_cv_max_iter(recwarn):
warnings.simplefilter('always')
with np.errstate(divide='raise', invalid='raise'):
X = diabetes.data
y = diabetes.target
rng = np.random.RandomState(42)
x = rng.randn(len(y))
X = diabetes.data
X = np.c_[X, x, x] # add correlated features
lars_cv = linear_model.LassoLarsCV(max_iter=5, cv=5)
lars_cv.fit(X, y)
# Check that there is no warning in general and no ConvergenceWarning
# in particular.
# Materialize the string representation of the warning to get a more
# informative error message in case of AssertionError.
recorded_warnings = [str(w) for w in recwarn]
assert recorded_warnings == []
def test_lasso_lars_ic():
# Test the LassoLarsIC object by checking that
# - some good features are selected.
# - alpha_bic > alpha_aic
# - n_nonzero_bic < n_nonzero_aic
lars_bic = linear_model.LassoLarsIC('bic')
lars_aic = linear_model.LassoLarsIC('aic')
rng = np.random.RandomState(42)
X = diabetes.data
X = np.c_[X, rng.randn(X.shape[0], 5)] # add 5 bad features
lars_bic.fit(X, y)
lars_aic.fit(X, y)
nonzero_bic = np.where(lars_bic.coef_)[0]
nonzero_aic = np.where(lars_aic.coef_)[0]
assert lars_bic.alpha_ > lars_aic.alpha_
assert len(nonzero_bic) < len(nonzero_aic)
assert np.max(nonzero_bic) < diabetes.data.shape[1]
# test error on unknown IC
lars_broken = linear_model.LassoLarsIC('<unknown>')
assert_raises(ValueError, lars_broken.fit, X, y)
def test_lars_path_readonly_data():
# When using automated memory mapping on large input, the
# fold data is in read-only mode
# This is a non-regression test for:
# https://github.com/scikit-learn/scikit-learn/issues/4597
splitted_data = train_test_split(X, y, random_state=42)
with TempMemmap(splitted_data) as (X_train, X_test, y_train, y_test):
# The following should not fail despite copy=False
_lars_path_residues(X_train, y_train, X_test, y_test, copy=False)
def test_lars_path_positive_constraint():
# this is the main test for the positive parameter on the lars_path method
# the estimator classes just make use of this function
# we do the test on the diabetes dataset
# ensure that we get negative coefficients when positive=False
# and all positive when positive=True
# for method 'lar' (default) and lasso
err_msg = "Positive constraint not supported for 'lar' coding method."
with pytest.raises(ValueError, match=err_msg):
linear_model.lars_path(diabetes['data'], diabetes['target'],
method='lar', positive=True)
method = 'lasso'
_, _, coefs = \
linear_model.lars_path(X, y, return_path=True, method=method,
positive=False)
assert coefs.min() < 0
_, _, coefs = \
linear_model.lars_path(X, y, return_path=True, method=method,
positive=True)
assert coefs.min() >= 0
# now we gonna test the positive option for all estimator classes
default_parameter = {'fit_intercept': False}
estimator_parameter_map = {'LassoLars': {'alpha': 0.1},
'LassoLarsCV': {},
'LassoLarsIC': {}}
def test_estimatorclasses_positive_constraint():
# testing the transmissibility for the positive option of all estimator
# classes in this same function here
default_parameter = {'fit_intercept': False}
estimator_parameter_map = {'LassoLars': {'alpha': 0.1},
'LassoLarsCV': {},
'LassoLarsIC': {}}
for estname in estimator_parameter_map:
params = default_parameter.copy()
params.update(estimator_parameter_map[estname])
estimator = getattr(linear_model, estname)(positive=False, **params)
estimator.fit(X, y)
assert estimator.coef_.min() < 0
estimator = getattr(linear_model, estname)(positive=True, **params)
estimator.fit(X, y)
assert min(estimator.coef_) >= 0
def test_lasso_lars_vs_lasso_cd_positive():
# Test that LassoLars and Lasso using coordinate descent give the
# same results when using the positive option
# This test is basically a copy of the above with additional positive
# option. However for the middle part, the comparison of coefficient values
# for a range of alphas, we had to make an adaptations. See below.
# not normalized data
X = 3 * diabetes.data
alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
positive=True)
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True)
for c, a in zip(lasso_path.T, alphas):
if a == 0:
continue
lasso_cd.alpha = a
lasso_cd.fit(X, y)
error = linalg.norm(c - lasso_cd.coef_)
assert error < 0.01
# The range of alphas chosen for coefficient comparison here is restricted
# as compared with the above test without the positive option. This is due
# to the circumstance that the Lars-Lasso algorithm does not converge to
# the least-squares-solution for small alphas, see 'Least Angle Regression'
# by Efron et al 2004. The coefficients are typically in congruence up to
# the smallest alpha reached by the Lars-Lasso algorithm and start to
# diverge thereafter. See
# https://gist.github.com/michigraber/7e7d7c75eca694c7a6ff
for alpha in np.linspace(6e-1, 1 - 1e-2, 20):
clf1 = linear_model.LassoLars(fit_intercept=False, alpha=alpha,
normalize=False, positive=True).fit(X, y)
clf2 = linear_model.Lasso(fit_intercept=False, alpha=alpha, tol=1e-8,
normalize=False, positive=True).fit(X, y)
err = linalg.norm(clf1.coef_ - clf2.coef_)
assert err < 1e-3
# normalized data
X = diabetes.data
alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
positive=True)
lasso_cd = linear_model.Lasso(fit_intercept=False, normalize=True,
tol=1e-8, positive=True)
for c, a in zip(lasso_path.T[:-1], alphas[:-1]): # don't include alpha=0
lasso_cd.alpha = a
lasso_cd.fit(X, y)
error = linalg.norm(c - lasso_cd.coef_)
assert error < 0.01
def test_lasso_lars_vs_R_implementation():
# Test that sklearn LassoLars implementation agrees with the LassoLars
# implementation available in R (lars library) under the following
# scenarios:
# 1) fit_intercept=False and normalize=False
# 2) fit_intercept=True and normalize=True
# Let's generate the data used in the bug report 7778
y = np.array([-6.45006793, -3.51251449, -8.52445396, 6.12277822,
-19.42109366])
x = np.array([[0.47299829, 0, 0, 0, 0],
[0.08239882, 0.85784863, 0, 0, 0],
[0.30114139, -0.07501577, 0.80895216, 0, 0],
[-0.01460346, -0.1015233, 0.0407278, 0.80338378, 0],
[-0.69363927, 0.06754067, 0.18064514, -0.0803561,
0.40427291]])
X = x.T
###########################################################################
# Scenario 1: Let's compare R vs sklearn when fit_intercept=False and
# normalize=False
###########################################################################
#
# The R result was obtained using the following code:
#
# library(lars)
# model_lasso_lars = lars(X, t(y), type="lasso", intercept=FALSE,
# trace=TRUE, normalize=FALSE)
# r = t(model_lasso_lars$beta)
#
r = np.array([[0, 0, 0, 0, 0, -79.810362809499026, -83.528788732782829,
-83.777653739190711, -83.784156932888934,
-84.033390591756657],
[0, 0, 0, 0, -0.476624256777266, 0, 0, 0, 0,
0.025219751009936],
[0, -3.577397088285891, -4.702795355871871,
-7.016748621359461, -7.614898471899412, -0.336938391359179,
0, 0, 0.001213370600853, 0.048162321585148],
[0, 0, 0, 2.231558436628169, 2.723267514525966,
2.811549786389614, 2.813766976061531, 2.817462468949557,
2.817368178703816, 2.816221090636795],
[0, 0, -1.218422599914637, -3.457726183014808,
-4.021304522060710, -45.827461592423745,
-47.776608869312305,
-47.911561610746404, -47.914845922736234,
-48.039562334265717]])
model_lasso_lars = linear_model.LassoLars(alpha=0, fit_intercept=False,
normalize=False)
model_lasso_lars.fit(X, y)
skl_betas = model_lasso_lars.coef_path_
assert_array_almost_equal(r, skl_betas, decimal=12)
###########################################################################
###########################################################################
# Scenario 2: Let's compare R vs sklearn when fit_intercept=True and
# normalize=True
#
# Note: When normalize is equal to True, R returns the coefficients in
# their original units, that is, they are rescaled back, whereas sklearn
# does not do that, therefore, we need to do this step before comparing
# their results.
###########################################################################
#
# The R result was obtained using the following code:
#
# library(lars)
# model_lasso_lars2 = lars(X, t(y), type="lasso", intercept=TRUE,
# trace=TRUE, normalize=TRUE)
# r2 = t(model_lasso_lars2$beta)
r2 = np.array([[0, 0, 0, 0, 0],
[0, 0, 0, 8.371887668009453, 19.463768371044026],
[0, 0, 0, 0, 9.901611055290553],
[0, 7.495923132833733, 9.245133544334507,
17.389369207545062, 26.971656815643499],
[0, 0, -1.569380717440311, -5.924804108067312,
-7.996385265061972]])
model_lasso_lars2 = linear_model.LassoLars(alpha=0, normalize=True)
model_lasso_lars2.fit(X, y)
skl_betas2 = model_lasso_lars2.coef_path_
# Let's rescale back the coefficients returned by sklearn before comparing
# against the R result (read the note above)
temp = X - np.mean(X, axis=0)
normx = np.sqrt(np.sum(temp ** 2, axis=0))
skl_betas2 /= normx[:, np.newaxis]
assert_array_almost_equal(r2, skl_betas2, decimal=12)
###########################################################################
@pytest.mark.parametrize('copy_X', [True, False])
def test_lasso_lars_copyX_behaviour(copy_X):
"""
Test that user input regarding copy_X is not being overridden (it was until
at least version 0.21)
"""
lasso_lars = LassoLarsIC(copy_X=copy_X, precompute=False)
rng = np.random.RandomState(0)
X = rng.normal(0, 1, (100, 5))
X_copy = X.copy()
y = X[:, 2]
lasso_lars.fit(X, y)
assert copy_X == np.array_equal(X, X_copy)
@pytest.mark.parametrize('copy_X', [True, False])
def test_lasso_lars_fit_copyX_behaviour(copy_X):
"""
Test that user input to .fit for copy_X overrides default __init__ value
"""
lasso_lars = LassoLarsIC(precompute=False)
rng = np.random.RandomState(0)
X = rng.normal(0, 1, (100, 5))
X_copy = X.copy()
y = X[:, 2]
lasso_lars.fit(X, y, copy_X=copy_X)
assert copy_X == np.array_equal(X, X_copy)
@pytest.mark.parametrize('est', (LassoLars(alpha=1e-3), Lars()))
def test_lars_with_jitter(est):
# Test that a small amount of jitter helps stability,
# using example provided in issue #2746
X = np.array([[0.0, 0.0, 0.0, -1.0, 0.0],
[0.0, -1.0, 0.0, 0.0, 0.0]])
y = [-2.5, -2.5]
expected_coef = [0, 2.5, 0, 2.5, 0]
# set to fit_intercept to False since target is constant and we want check
# the value of coef. coef would be all zeros otherwise.
est.set_params(fit_intercept=False)
est_jitter = clone(est).set_params(jitter=10e-8, random_state=0)
est.fit(X, y)
est_jitter.fit(X, y)
assert np.mean((est.coef_ - est_jitter.coef_)**2) > .1
np.testing.assert_allclose(est_jitter.coef_, expected_coef, rtol=1e-3)
def test_X_none_gram_not_none():
with pytest.raises(ValueError,
match="X cannot be None if Gram is not None"):
lars_path(X=None, y=[1], Gram='not None')
def test_copy_X_with_auto_gram():
# Non-regression test for #17789, `copy_X=True` and Gram='auto' does not
# overwrite X
rng = np.random.RandomState(42)
X = rng.rand(6, 6)
y = rng.rand(6)
X_before = X.copy()
linear_model.lars_path(X, y, Gram='auto', copy_X=True, method='lasso')
# X did not change
assert_allclose(X, X_before)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,232 @@
# Author: Vlad Niculae
# License: BSD 3 clause
import numpy as np
from sklearn.utils._testing import assert_raises
from sklearn.utils._testing import assert_array_equal
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import assert_warns
from sklearn.utils._testing import ignore_warnings
from sklearn.linear_model import (orthogonal_mp, orthogonal_mp_gram,
OrthogonalMatchingPursuit,
OrthogonalMatchingPursuitCV,
LinearRegression)
from sklearn.utils import check_random_state
from sklearn.datasets import make_sparse_coded_signal
n_samples, n_features, n_nonzero_coefs, n_targets = 25, 35, 5, 3
y, X, gamma = make_sparse_coded_signal(n_samples=n_targets,
n_components=n_features,
n_features=n_samples,
n_nonzero_coefs=n_nonzero_coefs,
random_state=0)
# Make X not of norm 1 for testing
X *= 10
y *= 10
G, Xy = np.dot(X.T, X), np.dot(X.T, y)
# this makes X (n_samples, n_features)
# and y (n_samples, 3)
def test_correct_shapes():
assert (orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5).shape ==
(n_features,))
assert (orthogonal_mp(X, y, n_nonzero_coefs=5).shape ==
(n_features, 3))
def test_correct_shapes_gram():
assert (orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5).shape ==
(n_features,))
assert (orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5).shape ==
(n_features, 3))
def test_n_nonzero_coefs():
assert np.count_nonzero(orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5)) <= 5
assert np.count_nonzero(orthogonal_mp(X, y[:, 0],
n_nonzero_coefs=5,
precompute=True)) <= 5
def test_tol():
tol = 0.5
gamma = orthogonal_mp(X, y[:, 0], tol=tol)
gamma_gram = orthogonal_mp(X, y[:, 0], tol=tol, precompute=True)
assert np.sum((y[:, 0] - np.dot(X, gamma)) ** 2) <= tol
assert np.sum((y[:, 0] - np.dot(X, gamma_gram)) ** 2) <= tol
def test_with_without_gram():
assert_array_almost_equal(
orthogonal_mp(X, y, n_nonzero_coefs=5),
orthogonal_mp(X, y, n_nonzero_coefs=5, precompute=True))
def test_with_without_gram_tol():
assert_array_almost_equal(
orthogonal_mp(X, y, tol=1.),
orthogonal_mp(X, y, tol=1., precompute=True))
def test_unreachable_accuracy():
assert_array_almost_equal(
orthogonal_mp(X, y, tol=0),
orthogonal_mp(X, y, n_nonzero_coefs=n_features))
assert_array_almost_equal(
assert_warns(RuntimeWarning, orthogonal_mp, X, y, tol=0,
precompute=True),
orthogonal_mp(X, y, precompute=True,
n_nonzero_coefs=n_features))
def test_bad_input():
assert_raises(ValueError, orthogonal_mp, X, y, tol=-1)
assert_raises(ValueError, orthogonal_mp, X, y, n_nonzero_coefs=-1)
assert_raises(ValueError, orthogonal_mp, X, y,
n_nonzero_coefs=n_features + 1)
assert_raises(ValueError, orthogonal_mp_gram, G, Xy, tol=-1)
assert_raises(ValueError, orthogonal_mp_gram, G, Xy, n_nonzero_coefs=-1)
assert_raises(ValueError, orthogonal_mp_gram, G, Xy,
n_nonzero_coefs=n_features + 1)
def test_perfect_signal_recovery():
idx, = gamma[:, 0].nonzero()
gamma_rec = orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5)
gamma_gram = orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5)
assert_array_equal(idx, np.flatnonzero(gamma_rec))
assert_array_equal(idx, np.flatnonzero(gamma_gram))
assert_array_almost_equal(gamma[:, 0], gamma_rec, decimal=2)
assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
def test_orthogonal_mp_gram_readonly():
# Non-regression test for:
# https://github.com/scikit-learn/scikit-learn/issues/5956
idx, = gamma[:, 0].nonzero()
G_readonly = G.copy()
G_readonly.setflags(write=False)
Xy_readonly = Xy.copy()
Xy_readonly.setflags(write=False)
gamma_gram = orthogonal_mp_gram(G_readonly, Xy_readonly[:, 0],
n_nonzero_coefs=5,
copy_Gram=False, copy_Xy=False)
assert_array_equal(idx, np.flatnonzero(gamma_gram))
assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
def test_estimator():
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
omp.fit(X, y[:, 0])
assert omp.coef_.shape == (n_features,)
assert omp.intercept_.shape == ()
assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
omp.fit(X, y)
assert omp.coef_.shape == (n_targets, n_features)
assert omp.intercept_.shape == (n_targets,)
assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
coef_normalized = omp.coef_[0].copy()
omp.set_params(fit_intercept=True, normalize=False)
omp.fit(X, y[:, 0])
assert_array_almost_equal(coef_normalized, omp.coef_)
omp.set_params(fit_intercept=False, normalize=False)
omp.fit(X, y[:, 0])
assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
assert omp.coef_.shape == (n_features,)
assert omp.intercept_ == 0
omp.fit(X, y)
assert omp.coef_.shape == (n_targets, n_features)
assert omp.intercept_ == 0
assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
def test_identical_regressors():
newX = X.copy()
newX[:, 1] = newX[:, 0]
gamma = np.zeros(n_features)
gamma[0] = gamma[1] = 1.
newy = np.dot(newX, gamma)
assert_warns(RuntimeWarning, orthogonal_mp, newX, newy, 2)
def test_swapped_regressors():
gamma = np.zeros(n_features)
# X[:, 21] should be selected first, then X[:, 0] selected second,
# which will take X[:, 21]'s place in case the algorithm does
# column swapping for optimization (which is the case at the moment)
gamma[21] = 1.0
gamma[0] = 0.5
new_y = np.dot(X, gamma)
new_Xy = np.dot(X.T, new_y)
gamma_hat = orthogonal_mp(X, new_y, n_nonzero_coefs=2)
gamma_hat_gram = orthogonal_mp_gram(G, new_Xy, n_nonzero_coefs=2)
assert_array_equal(np.flatnonzero(gamma_hat), [0, 21])
assert_array_equal(np.flatnonzero(gamma_hat_gram), [0, 21])
def test_no_atoms():
y_empty = np.zeros_like(y)
Xy_empty = np.dot(X.T, y_empty)
gamma_empty = ignore_warnings(orthogonal_mp)(X, y_empty,
n_nonzero_coefs=1)
gamma_empty_gram = ignore_warnings(orthogonal_mp)(G, Xy_empty,
n_nonzero_coefs=1)
assert np.all(gamma_empty == 0)
assert np.all(gamma_empty_gram == 0)
def test_omp_path():
path = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=True)
last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False)
assert path.shape == (n_features, n_targets, 5)
assert_array_almost_equal(path[:, :, -1], last)
path = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=True)
last = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=False)
assert path.shape == (n_features, n_targets, 5)
assert_array_almost_equal(path[:, :, -1], last)
def test_omp_return_path_prop_with_gram():
path = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=True,
precompute=True)
last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False,
precompute=True)
assert path.shape == (n_features, n_targets, 5)
assert_array_almost_equal(path[:, :, -1], last)
def test_omp_cv():
y_ = y[:, 0]
gamma_ = gamma[:, 0]
ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False,
max_iter=10)
ompcv.fit(X, y_)
assert ompcv.n_nonzero_coefs_ == n_nonzero_coefs
assert_array_almost_equal(ompcv.coef_, gamma_)
omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False,
n_nonzero_coefs=ompcv.n_nonzero_coefs_)
omp.fit(X, y_)
assert_array_almost_equal(ompcv.coef_, omp.coef_)
def test_omp_reaches_least_squares():
# Use small simple data; it's a sanity check but OMP can stop early
rng = check_random_state(0)
n_samples, n_features = (10, 8)
n_targets = 3
X = rng.randn(n_samples, n_features)
Y = rng.randn(n_samples, n_targets)
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_features)
lstsq = LinearRegression()
omp.fit(X, Y)
lstsq.fit(X, Y)
assert_array_almost_equal(omp.coef_, lstsq.coef_)

View file

@ -0,0 +1,281 @@
import numpy as np
import scipy.sparse as sp
import pytest
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import assert_array_equal
from sklearn.utils._testing import assert_almost_equal
from sklearn.utils._testing import assert_raises
from sklearn.base import ClassifierMixin
from sklearn.utils import check_random_state
from sklearn.datasets import load_iris
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.linear_model import PassiveAggressiveRegressor
iris = load_iris()
random_state = check_random_state(12)
indices = np.arange(iris.data.shape[0])
random_state.shuffle(indices)
X = iris.data[indices]
y = iris.target[indices]
X_csr = sp.csr_matrix(X)
class MyPassiveAggressive(ClassifierMixin):
def __init__(self, C=1.0, epsilon=0.01, loss="hinge",
fit_intercept=True, n_iter=1, random_state=None):
self.C = C
self.epsilon = epsilon
self.loss = loss
self.fit_intercept = fit_intercept
self.n_iter = n_iter
def fit(self, X, y):
n_samples, n_features = X.shape
self.w = np.zeros(n_features, dtype=np.float64)
self.b = 0.0
for t in range(self.n_iter):
for i in range(n_samples):
p = self.project(X[i])
if self.loss in ("hinge", "squared_hinge"):
loss = max(1 - y[i] * p, 0)
else:
loss = max(np.abs(p - y[i]) - self.epsilon, 0)
sqnorm = np.dot(X[i], X[i])
if self.loss in ("hinge", "epsilon_insensitive"):
step = min(self.C, loss / sqnorm)
elif self.loss in ("squared_hinge",
"squared_epsilon_insensitive"):
step = loss / (sqnorm + 1.0 / (2 * self.C))
if self.loss in ("hinge", "squared_hinge"):
step *= y[i]
else:
step *= np.sign(y[i] - p)
self.w += step * X[i]
if self.fit_intercept:
self.b += step
def project(self, X):
return np.dot(X, self.w) + self.b
def test_classifier_accuracy():
for data in (X, X_csr):
for fit_intercept in (True, False):
for average in (False, True):
clf = PassiveAggressiveClassifier(
C=1.0, max_iter=30, fit_intercept=fit_intercept,
random_state=1, average=average, tol=None)
clf.fit(data, y)
score = clf.score(data, y)
assert score > 0.79
if average:
assert hasattr(clf, '_average_coef')
assert hasattr(clf, '_average_intercept')
assert hasattr(clf, '_standard_intercept')
assert hasattr(clf, '_standard_coef')
def test_classifier_partial_fit():
classes = np.unique(y)
for data in (X, X_csr):
for average in (False, True):
clf = PassiveAggressiveClassifier(random_state=0,
average=average,
max_iter=5)
for t in range(30):
clf.partial_fit(data, y, classes)
score = clf.score(data, y)
assert score > 0.79
if average:
assert hasattr(clf, '_average_coef')
assert hasattr(clf, '_average_intercept')
assert hasattr(clf, '_standard_intercept')
assert hasattr(clf, '_standard_coef')
def test_classifier_refit():
# Classifier can be retrained on different labels and features.
clf = PassiveAggressiveClassifier(max_iter=5).fit(X, y)
assert_array_equal(clf.classes_, np.unique(y))
clf.fit(X[:, :-1], iris.target_names[y])
assert_array_equal(clf.classes_, iris.target_names)
@pytest.mark.parametrize('loss', ("hinge", "squared_hinge"))
def test_classifier_correctness(loss):
y_bin = y.copy()
y_bin[y != 1] = -1
clf1 = MyPassiveAggressive(loss=loss, n_iter=2)
clf1.fit(X, y_bin)
for data in (X, X_csr):
clf2 = PassiveAggressiveClassifier(loss=loss, max_iter=2,
shuffle=False, tol=None)
clf2.fit(data, y_bin)
assert_array_almost_equal(clf1.w, clf2.coef_.ravel(), decimal=2)
def test_classifier_undefined_methods():
clf = PassiveAggressiveClassifier(max_iter=100)
for meth in ("predict_proba", "predict_log_proba", "transform"):
assert_raises(AttributeError, lambda x: getattr(clf, x), meth)
def test_class_weights():
# Test class weights.
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
[1.0, 1.0], [1.0, 0.0]])
y2 = [1, 1, 1, -1, -1]
clf = PassiveAggressiveClassifier(C=0.1, max_iter=100, class_weight=None,
random_state=100)
clf.fit(X2, y2)
assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))
# we give a small weights to class 1
clf = PassiveAggressiveClassifier(C=0.1, max_iter=100,
class_weight={1: 0.001},
random_state=100)
clf.fit(X2, y2)
# now the hyperplane should rotate clock-wise and
# the prediction on this point should shift
assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))
def test_partial_fit_weight_class_balanced():
# partial_fit with class_weight='balanced' not supported
clf = PassiveAggressiveClassifier(class_weight="balanced", max_iter=100)
assert_raises(ValueError, clf.partial_fit, X, y, classes=np.unique(y))
def test_equal_class_weight():
X2 = [[1, 0], [1, 0], [0, 1], [0, 1]]
y2 = [0, 0, 1, 1]
clf = PassiveAggressiveClassifier(
C=0.1, max_iter=1000, tol=None, class_weight=None)
clf.fit(X2, y2)
# Already balanced, so "balanced" weights should have no effect
clf_balanced = PassiveAggressiveClassifier(
C=0.1, max_iter=1000, tol=None, class_weight="balanced")
clf_balanced.fit(X2, y2)
clf_weighted = PassiveAggressiveClassifier(
C=0.1, max_iter=1000, tol=None, class_weight={0: 0.5, 1: 0.5})
clf_weighted.fit(X2, y2)
# should be similar up to some epsilon due to learning rate schedule
assert_almost_equal(clf.coef_, clf_weighted.coef_, decimal=2)
assert_almost_equal(clf.coef_, clf_balanced.coef_, decimal=2)
def test_wrong_class_weight_label():
# ValueError due to wrong class_weight label.
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
[1.0, 1.0], [1.0, 0.0]])
y2 = [1, 1, 1, -1, -1]
clf = PassiveAggressiveClassifier(class_weight={0: 0.5}, max_iter=100)
assert_raises(ValueError, clf.fit, X2, y2)
def test_wrong_class_weight_format():
# ValueError due to wrong class_weight argument type.
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
[1.0, 1.0], [1.0, 0.0]])
y2 = [1, 1, 1, -1, -1]
clf = PassiveAggressiveClassifier(class_weight=[0.5], max_iter=100)
assert_raises(ValueError, clf.fit, X2, y2)
clf = PassiveAggressiveClassifier(class_weight="the larch", max_iter=100)
assert_raises(ValueError, clf.fit, X2, y2)
def test_regressor_mse():
y_bin = y.copy()
y_bin[y != 1] = -1
for data in (X, X_csr):
for fit_intercept in (True, False):
for average in (False, True):
reg = PassiveAggressiveRegressor(
C=1.0, fit_intercept=fit_intercept,
random_state=0, average=average, max_iter=5)
reg.fit(data, y_bin)
pred = reg.predict(data)
assert np.mean((pred - y_bin) ** 2) < 1.7
if average:
assert hasattr(reg, '_average_coef')
assert hasattr(reg, '_average_intercept')
assert hasattr(reg, '_standard_intercept')
assert hasattr(reg, '_standard_coef')
def test_regressor_partial_fit():
y_bin = y.copy()
y_bin[y != 1] = -1
for data in (X, X_csr):
for average in (False, True):
reg = PassiveAggressiveRegressor(random_state=0,
average=average, max_iter=100)
for t in range(50):
reg.partial_fit(data, y_bin)
pred = reg.predict(data)
assert np.mean((pred - y_bin) ** 2) < 1.7
if average:
assert hasattr(reg, '_average_coef')
assert hasattr(reg, '_average_intercept')
assert hasattr(reg, '_standard_intercept')
assert hasattr(reg, '_standard_coef')
@pytest.mark.parametrize(
'loss',
("epsilon_insensitive", "squared_epsilon_insensitive"))
def test_regressor_correctness(loss):
y_bin = y.copy()
y_bin[y != 1] = -1
reg1 = MyPassiveAggressive(loss=loss, n_iter=2)
reg1.fit(X, y_bin)
for data in (X, X_csr):
reg2 = PassiveAggressiveRegressor(tol=None, loss=loss, max_iter=2,
shuffle=False)
reg2.fit(data, y_bin)
assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
def test_regressor_undefined_methods():
reg = PassiveAggressiveRegressor(max_iter=100)
for meth in ("transform",):
assert_raises(AttributeError, lambda x: getattr(reg, x), meth)
# TODO: remove in 0.25
@pytest.mark.parametrize('klass', [PassiveAggressiveClassifier,
PassiveAggressiveRegressor])
def test_passive_aggressive_deprecated_attr(klass):
est = klass(average=True)
est.fit(X, y)
msg = "Attribute {} was deprecated"
for att in ['average_coef_', 'average_intercept_',
'standard_coef_', 'standard_intercept_']:
with pytest.warns(FutureWarning, match=msg.format(att)):
getattr(est, att)

View file

@ -0,0 +1,69 @@
import numpy as np
import scipy.sparse as sp
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import assert_raises
from sklearn.utils import check_random_state
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron
iris = load_iris()
random_state = check_random_state(12)
indices = np.arange(iris.data.shape[0])
random_state.shuffle(indices)
X = iris.data[indices]
y = iris.target[indices]
X_csr = sp.csr_matrix(X)
X_csr.sort_indices()
class MyPerceptron:
def __init__(self, n_iter=1):
self.n_iter = n_iter
def fit(self, X, y):
n_samples, n_features = X.shape
self.w = np.zeros(n_features, dtype=np.float64)
self.b = 0.0
for t in range(self.n_iter):
for i in range(n_samples):
if self.predict(X[i])[0] != y[i]:
self.w += y[i] * X[i]
self.b += y[i]
def project(self, X):
return np.dot(X, self.w) + self.b
def predict(self, X):
X = np.atleast_2d(X)
return np.sign(self.project(X))
def test_perceptron_accuracy():
for data in (X, X_csr):
clf = Perceptron(max_iter=100, tol=None, shuffle=False)
clf.fit(data, y)
score = clf.score(data, y)
assert score > 0.7
def test_perceptron_correctness():
y_bin = y.copy()
y_bin[y != 1] = -1
clf1 = MyPerceptron(n_iter=2)
clf1.fit(X, y_bin)
clf2 = Perceptron(max_iter=2, shuffle=False, tol=None)
clf2.fit(X, y_bin)
assert_array_almost_equal(clf1.w, clf2.coef_.ravel())
def test_undefined_methods():
clf = Perceptron(max_iter=100)
for meth in ("predict_proba", "predict_log_proba"):
assert_raises(AttributeError, lambda x: getattr(clf, x), meth)

View file

@ -0,0 +1,515 @@
import numpy as np
from scipy import sparse
from numpy.testing import assert_array_almost_equal
from numpy.testing import assert_array_equal
from sklearn.utils import check_random_state
from sklearn.utils._testing import assert_warns
from sklearn.utils._testing import assert_raises_regexp
from sklearn.utils._testing import assert_raises
from sklearn.utils._testing import assert_allclose
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.linear_model._ransac import _dynamic_max_trials
from sklearn.exceptions import ConvergenceWarning
# Generate coordinates of line
X = np.arange(-200, 200)
y = 0.2 * X + 20
data = np.column_stack([X, y])
# Add some faulty data
rng = np.random.RandomState(1000)
outliers = np.unique(rng.randint(len(X), size=200))
data[outliers, :] += 50 + rng.rand(len(outliers), 2) * 10
X = data[:, 0][:, np.newaxis]
y = data[:, 1]
def test_ransac_inliers_outliers():
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5, random_state=0)
# Estimate parameters of corrupted data
ransac_estimator.fit(X, y)
# Ground truth / reference inlier mask
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
).astype(np.bool_)
ref_inlier_mask[outliers] = False
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def test_ransac_is_data_valid():
def is_data_valid(X, y):
assert X.shape[0] == 2
assert y.shape[0] == 2
return False
rng = np.random.RandomState(0)
X = rng.rand(10, 2)
y = rng.rand(10, 1)
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5,
is_data_valid=is_data_valid,
random_state=0)
assert_raises(ValueError, ransac_estimator.fit, X, y)
def test_ransac_is_model_valid():
def is_model_valid(estimator, X, y):
assert X.shape[0] == 2
assert y.shape[0] == 2
return False
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5,
is_model_valid=is_model_valid,
random_state=0)
assert_raises(ValueError, ransac_estimator.fit, X, y)
def test_ransac_max_trials():
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5, max_trials=0,
random_state=0)
assert_raises(ValueError, ransac_estimator.fit, X, y)
# there is a 1e-9 chance it will take these many trials. No good reason
# 1e-2 isn't enough, can still happen
# 2 is the what ransac defines as min_samples = X.shape[1] + 1
max_trials = _dynamic_max_trials(
len(X) - len(outliers), X.shape[0], 2, 1 - 1e-9)
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2)
for i in range(50):
ransac_estimator.set_params(min_samples=2, random_state=i)
ransac_estimator.fit(X, y)
assert ransac_estimator.n_trials_ < max_trials + 1
def test_ransac_stop_n_inliers():
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5, stop_n_inliers=2,
random_state=0)
ransac_estimator.fit(X, y)
assert ransac_estimator.n_trials_ == 1
def test_ransac_stop_score():
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5, stop_score=0,
random_state=0)
ransac_estimator.fit(X, y)
assert ransac_estimator.n_trials_ == 1
def test_ransac_score():
X = np.arange(100)[:, None]
y = np.zeros((100, ))
y[0] = 1
y[1] = 100
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=0.5, random_state=0)
ransac_estimator.fit(X, y)
assert ransac_estimator.score(X[2:], y[2:]) == 1
assert ransac_estimator.score(X[:2], y[:2]) < 1
def test_ransac_predict():
X = np.arange(100)[:, None]
y = np.zeros((100, ))
y[0] = 1
y[1] = 100
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=0.5, random_state=0)
ransac_estimator.fit(X, y)
assert_array_equal(ransac_estimator.predict(X), np.zeros(100))
def test_ransac_resid_thresh_no_inliers():
# When residual_threshold=0.0 there are no inliers and a
# ValueError with a message should be raised
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=0.0, random_state=0,
max_trials=5)
msg = ("RANSAC could not find a valid consensus set")
assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
assert ransac_estimator.n_skips_no_inliers_ == 5
assert ransac_estimator.n_skips_invalid_data_ == 0
assert ransac_estimator.n_skips_invalid_model_ == 0
def test_ransac_no_valid_data():
def is_data_valid(X, y):
return False
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator,
is_data_valid=is_data_valid,
max_trials=5)
msg = ("RANSAC could not find a valid consensus set")
assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
assert ransac_estimator.n_skips_no_inliers_ == 0
assert ransac_estimator.n_skips_invalid_data_ == 5
assert ransac_estimator.n_skips_invalid_model_ == 0
def test_ransac_no_valid_model():
def is_model_valid(estimator, X, y):
return False
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator,
is_model_valid=is_model_valid,
max_trials=5)
msg = ("RANSAC could not find a valid consensus set")
assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
assert ransac_estimator.n_skips_no_inliers_ == 0
assert ransac_estimator.n_skips_invalid_data_ == 0
assert ransac_estimator.n_skips_invalid_model_ == 5
def test_ransac_exceed_max_skips():
def is_data_valid(X, y):
return False
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator,
is_data_valid=is_data_valid,
max_trials=5,
max_skips=3)
msg = ("RANSAC skipped more iterations than `max_skips`")
assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
assert ransac_estimator.n_skips_no_inliers_ == 0
assert ransac_estimator.n_skips_invalid_data_ == 4
assert ransac_estimator.n_skips_invalid_model_ == 0
def test_ransac_warn_exceed_max_skips():
global cause_skip
cause_skip = False
def is_data_valid(X, y):
global cause_skip
if not cause_skip:
cause_skip = True
return True
else:
return False
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator,
is_data_valid=is_data_valid,
max_skips=3,
max_trials=5)
assert_warns(ConvergenceWarning, ransac_estimator.fit, X, y)
assert ransac_estimator.n_skips_no_inliers_ == 0
assert ransac_estimator.n_skips_invalid_data_ == 4
assert ransac_estimator.n_skips_invalid_model_ == 0
def test_ransac_sparse_coo():
X_sparse = sparse.coo_matrix(X)
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5, random_state=0)
ransac_estimator.fit(X_sparse, y)
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
).astype(np.bool_)
ref_inlier_mask[outliers] = False
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def test_ransac_sparse_csr():
X_sparse = sparse.csr_matrix(X)
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5, random_state=0)
ransac_estimator.fit(X_sparse, y)
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
).astype(np.bool_)
ref_inlier_mask[outliers] = False
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def test_ransac_sparse_csc():
X_sparse = sparse.csc_matrix(X)
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5, random_state=0)
ransac_estimator.fit(X_sparse, y)
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
).astype(np.bool_)
ref_inlier_mask[outliers] = False
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def test_ransac_none_estimator():
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5, random_state=0)
ransac_none_estimator = RANSACRegressor(None, min_samples=2,
residual_threshold=5,
random_state=0)
ransac_estimator.fit(X, y)
ransac_none_estimator.fit(X, y)
assert_array_almost_equal(ransac_estimator.predict(X),
ransac_none_estimator.predict(X))
def test_ransac_min_n_samples():
base_estimator = LinearRegression()
ransac_estimator1 = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5, random_state=0)
ransac_estimator2 = RANSACRegressor(base_estimator,
min_samples=2. / X.shape[0],
residual_threshold=5, random_state=0)
ransac_estimator3 = RANSACRegressor(base_estimator, min_samples=-1,
residual_threshold=5, random_state=0)
ransac_estimator4 = RANSACRegressor(base_estimator, min_samples=5.2,
residual_threshold=5, random_state=0)
ransac_estimator5 = RANSACRegressor(base_estimator, min_samples=2.0,
residual_threshold=5, random_state=0)
ransac_estimator6 = RANSACRegressor(base_estimator,
residual_threshold=5, random_state=0)
ransac_estimator7 = RANSACRegressor(base_estimator,
min_samples=X.shape[0] + 1,
residual_threshold=5, random_state=0)
ransac_estimator1.fit(X, y)
ransac_estimator2.fit(X, y)
ransac_estimator5.fit(X, y)
ransac_estimator6.fit(X, y)
assert_array_almost_equal(ransac_estimator1.predict(X),
ransac_estimator2.predict(X))
assert_array_almost_equal(ransac_estimator1.predict(X),
ransac_estimator5.predict(X))
assert_array_almost_equal(ransac_estimator1.predict(X),
ransac_estimator6.predict(X))
assert_raises(ValueError, ransac_estimator3.fit, X, y)
assert_raises(ValueError, ransac_estimator4.fit, X, y)
assert_raises(ValueError, ransac_estimator7.fit, X, y)
def test_ransac_multi_dimensional_targets():
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5, random_state=0)
# 3-D target values
yyy = np.column_stack([y, y, y])
# Estimate parameters of corrupted data
ransac_estimator.fit(X, yyy)
# Ground truth / reference inlier mask
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
).astype(np.bool_)
ref_inlier_mask[outliers] = False
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def test_ransac_residual_loss():
loss_multi1 = lambda y_true, y_pred: np.sum(np.abs(y_true - y_pred), axis=1)
loss_multi2 = lambda y_true, y_pred: np.sum((y_true - y_pred) ** 2, axis=1)
loss_mono = lambda y_true, y_pred : np.abs(y_true - y_pred)
yyy = np.column_stack([y, y, y])
base_estimator = LinearRegression()
ransac_estimator0 = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5, random_state=0)
ransac_estimator1 = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5, random_state=0,
loss=loss_multi1)
ransac_estimator2 = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5, random_state=0,
loss=loss_multi2)
# multi-dimensional
ransac_estimator0.fit(X, yyy)
ransac_estimator1.fit(X, yyy)
ransac_estimator2.fit(X, yyy)
assert_array_almost_equal(ransac_estimator0.predict(X),
ransac_estimator1.predict(X))
assert_array_almost_equal(ransac_estimator0.predict(X),
ransac_estimator2.predict(X))
# one-dimensional
ransac_estimator0.fit(X, y)
ransac_estimator2.loss = loss_mono
ransac_estimator2.fit(X, y)
assert_array_almost_equal(ransac_estimator0.predict(X),
ransac_estimator2.predict(X))
ransac_estimator3 = RANSACRegressor(base_estimator, min_samples=2,
residual_threshold=5, random_state=0,
loss="squared_loss")
ransac_estimator3.fit(X, y)
assert_array_almost_equal(ransac_estimator0.predict(X),
ransac_estimator2.predict(X))
def test_ransac_default_residual_threshold():
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
random_state=0)
# Estimate parameters of corrupted data
ransac_estimator.fit(X, y)
# Ground truth / reference inlier mask
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
).astype(np.bool_)
ref_inlier_mask[outliers] = False
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def test_ransac_dynamic_max_trials():
# Numbers hand-calculated and confirmed on page 119 (Table 4.3) in
# Hartley, R.~I. and Zisserman, A., 2004,
# Multiple View Geometry in Computer Vision, Second Edition,
# Cambridge University Press, ISBN: 0521540518
# e = 0%, min_samples = X
assert _dynamic_max_trials(100, 100, 2, 0.99) == 1
# e = 5%, min_samples = 2
assert _dynamic_max_trials(95, 100, 2, 0.99) == 2
# e = 10%, min_samples = 2
assert _dynamic_max_trials(90, 100, 2, 0.99) == 3
# e = 30%, min_samples = 2
assert _dynamic_max_trials(70, 100, 2, 0.99) == 7
# e = 50%, min_samples = 2
assert _dynamic_max_trials(50, 100, 2, 0.99) == 17
# e = 5%, min_samples = 8
assert _dynamic_max_trials(95, 100, 8, 0.99) == 5
# e = 10%, min_samples = 8
assert _dynamic_max_trials(90, 100, 8, 0.99) == 9
# e = 30%, min_samples = 8
assert _dynamic_max_trials(70, 100, 8, 0.99) == 78
# e = 50%, min_samples = 8
assert _dynamic_max_trials(50, 100, 8, 0.99) == 1177
# e = 0%, min_samples = 10
assert _dynamic_max_trials(1, 100, 10, 0) == 0
assert _dynamic_max_trials(1, 100, 10, 1) == float('inf')
base_estimator = LinearRegression()
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
stop_probability=-0.1)
assert_raises(ValueError, ransac_estimator.fit, X, y)
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
stop_probability=1.1)
assert_raises(ValueError, ransac_estimator.fit, X, y)
def test_ransac_fit_sample_weight():
ransac_estimator = RANSACRegressor(random_state=0)
n_samples = y.shape[0]
weights = np.ones(n_samples)
ransac_estimator.fit(X, y, weights)
# sanity check
assert ransac_estimator.inlier_mask_.shape[0] == n_samples
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
).astype(np.bool_)
ref_inlier_mask[outliers] = False
# check that mask is correct
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
# check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
# X = X1 repeated n1 times, X2 repeated n2 times and so forth
random_state = check_random_state(0)
X_ = random_state.randint(0, 200, [10, 1])
y_ = np.ndarray.flatten(0.2 * X_ + 2)
sample_weight = random_state.randint(0, 10, 10)
outlier_X = random_state.randint(0, 1000, [1, 1])
outlier_weight = random_state.randint(0, 10, 1)
outlier_y = random_state.randint(-1000, 0, 1)
X_flat = np.append(np.repeat(X_, sample_weight, axis=0),
np.repeat(outlier_X, outlier_weight, axis=0), axis=0)
y_flat = np.ndarray.flatten(np.append(np.repeat(y_, sample_weight, axis=0),
np.repeat(outlier_y, outlier_weight, axis=0),
axis=0))
ransac_estimator.fit(X_flat, y_flat)
ref_coef_ = ransac_estimator.estimator_.coef_
sample_weight = np.append(sample_weight, outlier_weight)
X_ = np.append(X_, outlier_X, axis=0)
y_ = np.append(y_, outlier_y)
ransac_estimator.fit(X_, y_, sample_weight)
assert_allclose(ransac_estimator.estimator_.coef_, ref_coef_)
# check that if base_estimator.fit doesn't support
# sample_weight, raises error
base_estimator = OrthogonalMatchingPursuit()
ransac_estimator = RANSACRegressor(base_estimator)
assert_raises(ValueError, ransac_estimator.fit, X, y, weights)
def test_ransac_final_model_fit_sample_weight():
X, y = make_regression(n_samples=1000, random_state=10)
rng = check_random_state(42)
sample_weight = rng.randint(1, 4, size=y.shape[0])
sample_weight = sample_weight / sample_weight.sum()
ransac = RANSACRegressor(base_estimator=LinearRegression(), random_state=0)
ransac.fit(X, y, sample_weight=sample_weight)
final_model = LinearRegression()
mask_samples = ransac.inlier_mask_
final_model.fit(
X[mask_samples], y[mask_samples],
sample_weight=sample_weight[mask_samples]
)
assert_allclose(ransac.estimator_.coef_, final_model.coef_)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,848 @@
# Authors: Danny Sullivan <dbsullivan23@gmail.com>
# Tom Dupre la Tour <tom.dupre-la-tour@m4x.org>
#
# License: BSD 3 clause
import math
import pytest
import numpy as np
import scipy.sparse as sp
from scipy.special import logsumexp
from sklearn.linear_model._sag import get_auto_step_size
from sklearn.linear_model._sag_fast import _multinomial_grad_loss_all_samples
from sklearn.linear_model import LogisticRegression, Ridge
from sklearn.linear_model._base import make_dataset
from sklearn.linear_model._logistic import _multinomial_loss_grad
from sklearn.utils.extmath import row_norms
from sklearn.utils._testing import assert_almost_equal
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import assert_allclose
from sklearn.utils._testing import assert_raise_message
from sklearn.utils import compute_class_weight
from sklearn.utils import check_random_state
from sklearn.preprocessing import LabelEncoder, LabelBinarizer
from sklearn.datasets import make_blobs, load_iris, make_classification
from sklearn.base import clone
iris = load_iris()
# this is used for sag classification
def log_dloss(p, y):
z = p * y
# approximately equal and saves the computation of the log
if z > 18.0:
return math.exp(-z) * -y
if z < -18.0:
return -y
return -y / (math.exp(z) + 1.0)
def log_loss(p, y):
return np.mean(np.log(1. + np.exp(-y * p)))
# this is used for sag regression
def squared_dloss(p, y):
return p - y
def squared_loss(p, y):
return np.mean(0.5 * (p - y) * (p - y))
# function for measuring the log loss
def get_pobj(w, alpha, myX, myy, loss):
w = w.ravel()
pred = np.dot(myX, w)
p = loss(pred, myy)
p += alpha * w.dot(w) / 2.
return p
def sag(X, y, step_size, alpha, n_iter=1, dloss=None, sparse=False,
sample_weight=None, fit_intercept=True, saga=False):
n_samples, n_features = X.shape[0], X.shape[1]
weights = np.zeros(X.shape[1])
sum_gradient = np.zeros(X.shape[1])
gradient_memory = np.zeros((n_samples, n_features))
intercept = 0.0
intercept_sum_gradient = 0.0
intercept_gradient_memory = np.zeros(n_samples)
rng = np.random.RandomState(77)
decay = 1.0
seen = set()
# sparse data has a fixed decay of .01
if sparse:
decay = .01
for epoch in range(n_iter):
for k in range(n_samples):
idx = int(rng.rand(1) * n_samples)
# idx = k
entry = X[idx]
seen.add(idx)
p = np.dot(entry, weights) + intercept
gradient = dloss(p, y[idx])
if sample_weight is not None:
gradient *= sample_weight[idx]
update = entry * gradient + alpha * weights
gradient_correction = update - gradient_memory[idx]
sum_gradient += gradient_correction
gradient_memory[idx] = update
if saga:
weights -= (gradient_correction *
step_size * (1 - 1. / len(seen)))
if fit_intercept:
gradient_correction = (gradient -
intercept_gradient_memory[idx])
intercept_gradient_memory[idx] = gradient
intercept_sum_gradient += gradient_correction
gradient_correction *= step_size * (1. - 1. / len(seen))
if saga:
intercept -= (step_size * intercept_sum_gradient /
len(seen) * decay) + gradient_correction
else:
intercept -= (step_size * intercept_sum_gradient /
len(seen) * decay)
weights -= step_size * sum_gradient / len(seen)
return weights, intercept
def sag_sparse(X, y, step_size, alpha, n_iter=1,
dloss=None, sample_weight=None, sparse=False,
fit_intercept=True, saga=False, random_state=0):
if step_size * alpha == 1.:
raise ZeroDivisionError("Sparse sag does not handle the case "
"step_size * alpha == 1")
n_samples, n_features = X.shape[0], X.shape[1]
weights = np.zeros(n_features)
sum_gradient = np.zeros(n_features)
last_updated = np.zeros(n_features, dtype=np.int)
gradient_memory = np.zeros(n_samples)
rng = check_random_state(random_state)
intercept = 0.0
intercept_sum_gradient = 0.0
wscale = 1.0
decay = 1.0
seen = set()
c_sum = np.zeros(n_iter * n_samples)
# sparse data has a fixed decay of .01
if sparse:
decay = .01
counter = 0
for epoch in range(n_iter):
for k in range(n_samples):
# idx = k
idx = int(rng.rand(1) * n_samples)
entry = X[idx]
seen.add(idx)
if counter >= 1:
for j in range(n_features):
if last_updated[j] == 0:
weights[j] -= c_sum[counter - 1] * sum_gradient[j]
else:
weights[j] -= ((c_sum[counter - 1] -
c_sum[last_updated[j] - 1]) *
sum_gradient[j])
last_updated[j] = counter
p = (wscale * np.dot(entry, weights)) + intercept
gradient = dloss(p, y[idx])
if sample_weight is not None:
gradient *= sample_weight[idx]
update = entry * gradient
gradient_correction = update - (gradient_memory[idx] * entry)
sum_gradient += gradient_correction
if saga:
for j in range(n_features):
weights[j] -= (gradient_correction[j] * step_size *
(1 - 1. / len(seen)) / wscale)
if fit_intercept:
gradient_correction = gradient - gradient_memory[idx]
intercept_sum_gradient += gradient_correction
gradient_correction *= step_size * (1. - 1. / len(seen))
if saga:
intercept -= ((step_size * intercept_sum_gradient /
len(seen) * decay) +
gradient_correction)
else:
intercept -= (step_size * intercept_sum_gradient /
len(seen) * decay)
gradient_memory[idx] = gradient
wscale *= (1.0 - alpha * step_size)
if counter == 0:
c_sum[0] = step_size / (wscale * len(seen))
else:
c_sum[counter] = (c_sum[counter - 1] +
step_size / (wscale * len(seen)))
if counter >= 1 and wscale < 1e-9:
for j in range(n_features):
if last_updated[j] == 0:
weights[j] -= c_sum[counter] * sum_gradient[j]
else:
weights[j] -= ((c_sum[counter] -
c_sum[last_updated[j] - 1]) *
sum_gradient[j])
last_updated[j] = counter + 1
c_sum[counter] = 0
weights *= wscale
wscale = 1.0
counter += 1
for j in range(n_features):
if last_updated[j] == 0:
weights[j] -= c_sum[counter - 1] * sum_gradient[j]
else:
weights[j] -= ((c_sum[counter - 1] -
c_sum[last_updated[j] - 1]) *
sum_gradient[j])
weights *= wscale
return weights, intercept
def get_step_size(X, alpha, fit_intercept, classification=True):
if classification:
return (4.0 / (np.max(np.sum(X * X, axis=1)) +
fit_intercept + 4.0 * alpha))
else:
return 1.0 / (np.max(np.sum(X * X, axis=1)) + fit_intercept + alpha)
def test_classifier_matching():
n_samples = 20
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0,
cluster_std=0.1)
y[y == 0] = -1
alpha = 1.1
fit_intercept = True
step_size = get_step_size(X, alpha, fit_intercept)
for solver in ['sag', 'saga']:
if solver == 'sag':
n_iter = 80
else:
# SAGA variance w.r.t. stream order is higher
n_iter = 300
clf = LogisticRegression(solver=solver, fit_intercept=fit_intercept,
tol=1e-11, C=1. / alpha / n_samples,
max_iter=n_iter, random_state=10,
multi_class='ovr')
clf.fit(X, y)
weights, intercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
dloss=log_dloss,
fit_intercept=fit_intercept,
saga=solver == 'saga')
weights2, intercept2 = sag(X, y, step_size, alpha, n_iter=n_iter,
dloss=log_dloss,
fit_intercept=fit_intercept,
saga=solver == 'saga')
weights = np.atleast_2d(weights)
intercept = np.atleast_1d(intercept)
weights2 = np.atleast_2d(weights2)
intercept2 = np.atleast_1d(intercept2)
assert_array_almost_equal(weights, clf.coef_, decimal=9)
assert_array_almost_equal(intercept, clf.intercept_, decimal=9)
assert_array_almost_equal(weights2, clf.coef_, decimal=9)
assert_array_almost_equal(intercept2, clf.intercept_, decimal=9)
def test_regressor_matching():
n_samples = 10
n_features = 5
rng = np.random.RandomState(10)
X = rng.normal(size=(n_samples, n_features))
true_w = rng.normal(size=n_features)
y = X.dot(true_w)
alpha = 1.
n_iter = 100
fit_intercept = True
step_size = get_step_size(X, alpha, fit_intercept, classification=False)
clf = Ridge(fit_intercept=fit_intercept, tol=.00000000001, solver='sag',
alpha=alpha * n_samples, max_iter=n_iter)
clf.fit(X, y)
weights1, intercept1 = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
dloss=squared_dloss,
fit_intercept=fit_intercept)
weights2, intercept2 = sag(X, y, step_size, alpha, n_iter=n_iter,
dloss=squared_dloss,
fit_intercept=fit_intercept)
assert_allclose(weights1, clf.coef_)
assert_allclose(intercept1, clf.intercept_)
assert_allclose(weights2, clf.coef_)
assert_allclose(intercept2, clf.intercept_)
@pytest.mark.filterwarnings('ignore:The max_iter was reached')
def test_sag_pobj_matches_logistic_regression():
"""tests if the sag pobj matches log reg"""
n_samples = 100
alpha = 1.0
max_iter = 20
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0,
cluster_std=0.1)
clf1 = LogisticRegression(solver='sag', fit_intercept=False, tol=.0000001,
C=1. / alpha / n_samples, max_iter=max_iter,
random_state=10, multi_class='ovr')
clf2 = clone(clf1)
clf3 = LogisticRegression(fit_intercept=False, tol=.0000001,
C=1. / alpha / n_samples, max_iter=max_iter,
random_state=10, multi_class='ovr')
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
clf3.fit(X, y)
pobj1 = get_pobj(clf1.coef_, alpha, X, y, log_loss)
pobj2 = get_pobj(clf2.coef_, alpha, X, y, log_loss)
pobj3 = get_pobj(clf3.coef_, alpha, X, y, log_loss)
assert_array_almost_equal(pobj1, pobj2, decimal=4)
assert_array_almost_equal(pobj2, pobj3, decimal=4)
assert_array_almost_equal(pobj3, pobj1, decimal=4)
@pytest.mark.filterwarnings('ignore:The max_iter was reached')
def test_sag_pobj_matches_ridge_regression():
"""tests if the sag pobj matches ridge reg"""
n_samples = 100
n_features = 10
alpha = 1.0
n_iter = 100
fit_intercept = False
rng = np.random.RandomState(10)
X = rng.normal(size=(n_samples, n_features))
true_w = rng.normal(size=n_features)
y = X.dot(true_w)
clf1 = Ridge(fit_intercept=fit_intercept, tol=.00000000001, solver='sag',
alpha=alpha, max_iter=n_iter, random_state=42)
clf2 = clone(clf1)
clf3 = Ridge(fit_intercept=fit_intercept, tol=.00001, solver='lsqr',
alpha=alpha, max_iter=n_iter, random_state=42)
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
clf3.fit(X, y)
pobj1 = get_pobj(clf1.coef_, alpha, X, y, squared_loss)
pobj2 = get_pobj(clf2.coef_, alpha, X, y, squared_loss)
pobj3 = get_pobj(clf3.coef_, alpha, X, y, squared_loss)
assert_array_almost_equal(pobj1, pobj2, decimal=4)
assert_array_almost_equal(pobj1, pobj3, decimal=4)
assert_array_almost_equal(pobj3, pobj2, decimal=4)
@pytest.mark.filterwarnings('ignore:The max_iter was reached')
def test_sag_regressor_computed_correctly():
"""tests if the sag regressor is computed correctly"""
alpha = .1
n_features = 10
n_samples = 40
max_iter = 100
tol = .000001
fit_intercept = True
rng = np.random.RandomState(0)
X = rng.normal(size=(n_samples, n_features))
w = rng.normal(size=n_features)
y = np.dot(X, w) + 2.
step_size = get_step_size(X, alpha, fit_intercept, classification=False)
clf1 = Ridge(fit_intercept=fit_intercept, tol=tol, solver='sag',
alpha=alpha * n_samples, max_iter=max_iter,
random_state=rng)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
spweights1, spintercept1 = sag_sparse(X, y, step_size, alpha,
n_iter=max_iter,
dloss=squared_dloss,
fit_intercept=fit_intercept,
random_state=rng)
spweights2, spintercept2 = sag_sparse(X, y, step_size, alpha,
n_iter=max_iter,
dloss=squared_dloss, sparse=True,
fit_intercept=fit_intercept,
random_state=rng)
assert_array_almost_equal(clf1.coef_.ravel(),
spweights1.ravel(),
decimal=3)
assert_almost_equal(clf1.intercept_, spintercept1, decimal=1)
# TODO: uncomment when sparse Ridge with intercept will be fixed (#4710)
# assert_array_almost_equal(clf2.coef_.ravel(),
# spweights2.ravel(),
# decimal=3)
# assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)'''
def test_get_auto_step_size():
X = np.array([[1, 2, 3], [2, 3, 4], [2, 3, 2]], dtype=np.float64)
alpha = 1.2
fit_intercept = False
# sum the squares of the second sample because that's the largest
max_squared_sum = 4 + 9 + 16
max_squared_sum_ = row_norms(X, squared=True).max()
n_samples = X.shape[0]
assert_almost_equal(max_squared_sum, max_squared_sum_, decimal=4)
for saga in [True, False]:
for fit_intercept in (True, False):
if saga:
L_sqr = (max_squared_sum + alpha + int(fit_intercept))
L_log = (max_squared_sum + 4.0 * alpha +
int(fit_intercept)) / 4.0
mun_sqr = min(2 * n_samples * alpha, L_sqr)
mun_log = min(2 * n_samples * alpha, L_log)
step_size_sqr = 1 / (2 * L_sqr + mun_sqr)
step_size_log = 1 / (2 * L_log + mun_log)
else:
step_size_sqr = 1.0 / (max_squared_sum +
alpha + int(fit_intercept))
step_size_log = 4.0 / (max_squared_sum + 4.0 * alpha +
int(fit_intercept))
step_size_sqr_ = get_auto_step_size(max_squared_sum_, alpha,
"squared",
fit_intercept,
n_samples=n_samples,
is_saga=saga)
step_size_log_ = get_auto_step_size(max_squared_sum_, alpha, "log",
fit_intercept,
n_samples=n_samples,
is_saga=saga)
assert_almost_equal(step_size_sqr, step_size_sqr_, decimal=4)
assert_almost_equal(step_size_log, step_size_log_, decimal=4)
msg = 'Unknown loss function for SAG solver, got wrong instead of'
assert_raise_message(ValueError, msg, get_auto_step_size,
max_squared_sum_, alpha, "wrong", fit_intercept)
def test_sag_regressor():
"""tests if the sag regressor performs well"""
xmin, xmax = -5, 5
n_samples = 20
tol = .001
max_iter = 50
alpha = 0.1
rng = np.random.RandomState(0)
X = np.linspace(xmin, xmax, n_samples).reshape(n_samples, 1)
# simple linear function without noise
y = 0.5 * X.ravel()
clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter,
alpha=alpha * n_samples, random_state=rng)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
score1 = clf1.score(X, y)
score2 = clf2.score(X, y)
assert score1 > 0.99
assert score2 > 0.99
# simple linear function with noise
y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel()
clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter,
alpha=alpha * n_samples)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
score1 = clf1.score(X, y)
score2 = clf2.score(X, y)
score2 = clf2.score(X, y)
assert score1 > 0.5
assert score2 > 0.5
@pytest.mark.filterwarnings('ignore:The max_iter was reached')
def test_sag_classifier_computed_correctly():
"""tests if the binary classifier is computed correctly"""
alpha = .1
n_samples = 50
n_iter = 50
tol = .00001
fit_intercept = True
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0,
cluster_std=0.1)
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
classes = np.unique(y)
y_tmp = np.ones(n_samples)
y_tmp[y != classes[1]] = -1
y = y_tmp
clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
max_iter=n_iter, tol=tol, random_state=77,
fit_intercept=fit_intercept, multi_class='ovr')
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
spweights, spintercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
dloss=log_dloss,
fit_intercept=fit_intercept)
spweights2, spintercept2 = sag_sparse(X, y, step_size, alpha,
n_iter=n_iter,
dloss=log_dloss, sparse=True,
fit_intercept=fit_intercept)
assert_array_almost_equal(clf1.coef_.ravel(),
spweights.ravel(),
decimal=2)
assert_almost_equal(clf1.intercept_, spintercept, decimal=1)
assert_array_almost_equal(clf2.coef_.ravel(),
spweights2.ravel(),
decimal=2)
assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
@pytest.mark.filterwarnings('ignore:The max_iter was reached')
def test_sag_multiclass_computed_correctly():
"""tests if the multiclass classifier is computed correctly"""
alpha = .1
n_samples = 20
tol = .00001
max_iter = 40
fit_intercept = True
X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0,
cluster_std=0.1)
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
classes = np.unique(y)
clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
max_iter=max_iter, tol=tol, random_state=77,
fit_intercept=fit_intercept, multi_class='ovr')
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
coef1 = []
intercept1 = []
coef2 = []
intercept2 = []
for cl in classes:
y_encoded = np.ones(n_samples)
y_encoded[y != cl] = -1
spweights1, spintercept1 = sag_sparse(X, y_encoded, step_size, alpha,
dloss=log_dloss, n_iter=max_iter,
fit_intercept=fit_intercept)
spweights2, spintercept2 = sag_sparse(X, y_encoded, step_size, alpha,
dloss=log_dloss, n_iter=max_iter,
sparse=True,
fit_intercept=fit_intercept)
coef1.append(spweights1)
intercept1.append(spintercept1)
coef2.append(spweights2)
intercept2.append(spintercept2)
coef1 = np.vstack(coef1)
intercept1 = np.array(intercept1)
coef2 = np.vstack(coef2)
intercept2 = np.array(intercept2)
for i, cl in enumerate(classes):
assert_array_almost_equal(clf1.coef_[i].ravel(),
coef1[i].ravel(),
decimal=2)
assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1)
assert_array_almost_equal(clf2.coef_[i].ravel(),
coef2[i].ravel(),
decimal=2)
assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)
def test_classifier_results():
"""tests if classifier results match target"""
alpha = .1
n_features = 20
n_samples = 10
tol = .01
max_iter = 200
rng = np.random.RandomState(0)
X = rng.normal(size=(n_samples, n_features))
w = rng.normal(size=n_features)
y = np.dot(X, w)
y = np.sign(y)
clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
max_iter=max_iter, tol=tol, random_state=77)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert_almost_equal(pred1, y, decimal=12)
assert_almost_equal(pred2, y, decimal=12)
@pytest.mark.filterwarnings('ignore:The max_iter was reached')
def test_binary_classifier_class_weight():
"""tests binary classifier with classweights for each class"""
alpha = .1
n_samples = 50
n_iter = 20
tol = .00001
fit_intercept = True
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=10,
cluster_std=0.1)
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
classes = np.unique(y)
y_tmp = np.ones(n_samples)
y_tmp[y != classes[1]] = -1
y = y_tmp
class_weight = {1: .45, -1: .55}
clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
max_iter=n_iter, tol=tol, random_state=77,
fit_intercept=fit_intercept, multi_class='ovr',
class_weight=class_weight)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
le = LabelEncoder()
class_weight_ = compute_class_weight(class_weight, classes=np.unique(y),
y=y)
sample_weight = class_weight_[le.fit_transform(y)]
spweights, spintercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
dloss=log_dloss,
sample_weight=sample_weight,
fit_intercept=fit_intercept)
spweights2, spintercept2 = sag_sparse(X, y, step_size, alpha,
n_iter=n_iter,
dloss=log_dloss, sparse=True,
sample_weight=sample_weight,
fit_intercept=fit_intercept)
assert_array_almost_equal(clf1.coef_.ravel(),
spweights.ravel(),
decimal=2)
assert_almost_equal(clf1.intercept_, spintercept, decimal=1)
assert_array_almost_equal(clf2.coef_.ravel(),
spweights2.ravel(),
decimal=2)
assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
@pytest.mark.filterwarnings('ignore:The max_iter was reached')
def test_multiclass_classifier_class_weight():
"""tests multiclass with classweights for each class"""
alpha = .1
n_samples = 20
tol = .00001
max_iter = 50
class_weight = {0: .45, 1: .55, 2: .75}
fit_intercept = True
X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0,
cluster_std=0.1)
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
classes = np.unique(y)
clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
max_iter=max_iter, tol=tol, random_state=77,
fit_intercept=fit_intercept, multi_class='ovr',
class_weight=class_weight)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
le = LabelEncoder()
class_weight_ = compute_class_weight(class_weight, classes=np.unique(y),
y=y)
sample_weight = class_weight_[le.fit_transform(y)]
coef1 = []
intercept1 = []
coef2 = []
intercept2 = []
for cl in classes:
y_encoded = np.ones(n_samples)
y_encoded[y != cl] = -1
spweights1, spintercept1 = sag_sparse(X, y_encoded, step_size, alpha,
n_iter=max_iter, dloss=log_dloss,
sample_weight=sample_weight)
spweights2, spintercept2 = sag_sparse(X, y_encoded, step_size, alpha,
n_iter=max_iter, dloss=log_dloss,
sample_weight=sample_weight,
sparse=True)
coef1.append(spweights1)
intercept1.append(spintercept1)
coef2.append(spweights2)
intercept2.append(spintercept2)
coef1 = np.vstack(coef1)
intercept1 = np.array(intercept1)
coef2 = np.vstack(coef2)
intercept2 = np.array(intercept2)
for i, cl in enumerate(classes):
assert_array_almost_equal(clf1.coef_[i].ravel(),
coef1[i].ravel(),
decimal=2)
assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1)
assert_array_almost_equal(clf2.coef_[i].ravel(),
coef2[i].ravel(),
decimal=2)
assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)
def test_classifier_single_class():
"""tests if ValueError is thrown with only one class"""
X = [[1, 2], [3, 4]]
y = [1, 1]
assert_raise_message(ValueError,
"This solver needs samples of at least 2 classes "
"in the data",
LogisticRegression(solver='sag').fit,
X, y)
def test_step_size_alpha_error():
X = [[0, 0], [0, 0]]
y = [1, -1]
fit_intercept = False
alpha = 1.
msg = ("Current sag implementation does not handle the case"
" step_size * alpha_scaled == 1")
clf1 = LogisticRegression(solver='sag', C=1. / alpha,
fit_intercept=fit_intercept)
assert_raise_message(ZeroDivisionError, msg, clf1.fit, X, y)
clf2 = Ridge(fit_intercept=fit_intercept, solver='sag', alpha=alpha)
assert_raise_message(ZeroDivisionError, msg, clf2.fit, X, y)
def test_multinomial_loss():
# test if the multinomial loss and gradient computations are consistent
X, y = iris.data, iris.target.astype(np.float64)
n_samples, n_features = X.shape
n_classes = len(np.unique(y))
rng = check_random_state(42)
weights = rng.randn(n_features, n_classes)
intercept = rng.randn(n_classes)
sample_weights = rng.randn(n_samples)
np.abs(sample_weights, sample_weights)
# compute loss and gradient like in multinomial SAG
dataset, _ = make_dataset(X, y, sample_weights, random_state=42)
loss_1, grad_1 = _multinomial_grad_loss_all_samples(dataset, weights,
intercept, n_samples,
n_features, n_classes)
# compute loss and gradient like in multinomial LogisticRegression
lbin = LabelBinarizer()
Y_bin = lbin.fit_transform(y)
weights_intercept = np.vstack((weights, intercept)).T.ravel()
loss_2, grad_2, _ = _multinomial_loss_grad(weights_intercept, X, Y_bin,
0.0, sample_weights)
grad_2 = grad_2.reshape(n_classes, -1)
grad_2 = grad_2[:, :-1].T
# comparison
assert_array_almost_equal(grad_1, grad_2)
assert_almost_equal(loss_1, loss_2)
def test_multinomial_loss_ground_truth():
# n_samples, n_features, n_classes = 4, 2, 3
n_classes = 3
X = np.array([[1.1, 2.2], [2.2, -4.4], [3.3, -2.2], [1.1, 1.1]])
y = np.array([0, 1, 2, 0])
lbin = LabelBinarizer()
Y_bin = lbin.fit_transform(y)
weights = np.array([[0.1, 0.2, 0.3], [1.1, 1.2, -1.3]])
intercept = np.array([1., 0, -.2])
sample_weights = np.array([0.8, 1, 1, 0.8])
prediction = np.dot(X, weights) + intercept
logsumexp_prediction = logsumexp(prediction, axis=1)
p = prediction - logsumexp_prediction[:, np.newaxis]
loss_1 = -(sample_weights[:, np.newaxis] * p * Y_bin).sum()
diff = sample_weights[:, np.newaxis] * (np.exp(p) - Y_bin)
grad_1 = np.dot(X.T, diff)
weights_intercept = np.vstack((weights, intercept)).T.ravel()
loss_2, grad_2, _ = _multinomial_loss_grad(weights_intercept, X, Y_bin,
0.0, sample_weights)
grad_2 = grad_2.reshape(n_classes, -1)
grad_2 = grad_2[:, :-1].T
assert_almost_equal(loss_1, loss_2)
assert_array_almost_equal(grad_1, grad_2)
# ground truth
loss_gt = 11.680360354325961
grad_gt = np.array([[-0.557487, -1.619151, +2.176638],
[-0.903942, +5.258745, -4.354803]])
assert_almost_equal(loss_1, loss_gt)
assert_array_almost_equal(grad_1, grad_gt)
@pytest.mark.parametrize("solver", ["sag", "saga"])
def test_sag_classifier_raises_error(solver):
# Following #13316, the error handling behavior changed in cython sag. This
# is simply a non-regression test to make sure numerical errors are
# properly raised.
# Train a classifier on a simple problem
rng = np.random.RandomState(42)
X, y = make_classification(random_state=rng)
clf = LogisticRegression(solver=solver, random_state=rng, warm_start=True)
clf.fit(X, y)
# Trigger a numerical error by:
# - corrupting the fitted coefficients of the classifier
# - fit it again starting from its current state thanks to warm_start
clf.coef_[:] = np.nan
with pytest.raises(ValueError, match="Floating-point under-/overflow"):
clf.fit(X, y)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,300 @@
import numpy as np
import scipy.sparse as sp
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import assert_almost_equal
from sklearn.utils._testing import ignore_warnings
from sklearn.utils._testing import assert_warns
from sklearn.exceptions import ConvergenceWarning
from sklearn.linear_model import Lasso, ElasticNet, LassoCV, ElasticNetCV
def test_sparse_coef():
# Check that the sparse_coef property works
clf = ElasticNet()
clf.coef_ = [1, 2, 3]
assert sp.isspmatrix(clf.sparse_coef_)
assert clf.sparse_coef_.toarray().tolist()[0] == clf.coef_
def test_normalize_option():
# Check that the normalize option in enet works
X = sp.csc_matrix([[-1], [0], [1]])
y = [-1, 0, 1]
clf_dense = ElasticNet(normalize=True)
clf_sparse = ElasticNet(normalize=True)
clf_dense.fit(X, y)
X = sp.csc_matrix(X)
clf_sparse.fit(X, y)
assert_almost_equal(clf_dense.dual_gap_, 0)
assert_array_almost_equal(clf_dense.coef_, clf_sparse.coef_)
def test_lasso_zero():
# Check that the sparse lasso can handle zero data without crashing
X = sp.csc_matrix((3, 1))
y = [0, 0, 0]
T = np.array([[1], [2], [3]])
clf = Lasso().fit(X, y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [0])
assert_array_almost_equal(pred, [0, 0, 0])
assert_almost_equal(clf.dual_gap_, 0)
def test_enet_toy_list_input():
# Test ElasticNet for various values of alpha and l1_ratio with list X
X = np.array([[-1], [0], [1]])
X = sp.csc_matrix(X)
Y = [-1, 0, 1] # just a straight line
T = np.array([[2], [3], [4]]) # test sample
# this should be the same as unregularized least squares
clf = ElasticNet(alpha=0, l1_ratio=1.0)
# catch warning about alpha=0.
# this is discouraged but should work.
ignore_warnings(clf.fit)(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [1])
assert_array_almost_equal(pred, [2, 3, 4])
assert_almost_equal(clf.dual_gap_, 0)
clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=1000)
clf.fit(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
assert_almost_equal(clf.dual_gap_, 0)
clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
clf.fit(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [0.45454], 3)
assert_array_almost_equal(pred, [0.9090, 1.3636, 1.8181], 3)
assert_almost_equal(clf.dual_gap_, 0)
def test_enet_toy_explicit_sparse_input():
# Test ElasticNet for various values of alpha and l1_ratio with sparse X
f = ignore_warnings
# training samples
X = sp.lil_matrix((3, 1))
X[0, 0] = -1
# X[1, 0] = 0
X[2, 0] = 1
Y = [-1, 0, 1] # just a straight line (the identity function)
# test samples
T = sp.lil_matrix((3, 1))
T[0, 0] = 2
T[1, 0] = 3
T[2, 0] = 4
# this should be the same as lasso
clf = ElasticNet(alpha=0, l1_ratio=1.0)
f(clf.fit)(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [1])
assert_array_almost_equal(pred, [2, 3, 4])
assert_almost_equal(clf.dual_gap_, 0)
clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=1000)
clf.fit(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
assert_almost_equal(clf.dual_gap_, 0)
clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
clf.fit(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [0.45454], 3)
assert_array_almost_equal(pred, [0.9090, 1.3636, 1.8181], 3)
assert_almost_equal(clf.dual_gap_, 0)
def make_sparse_data(n_samples=100, n_features=100, n_informative=10, seed=42,
positive=False, n_targets=1):
random_state = np.random.RandomState(seed)
# build an ill-posed linear regression problem with many noisy features and
# comparatively few samples
# generate a ground truth model
w = random_state.randn(n_features, n_targets)
w[n_informative:] = 0.0 # only the top features are impacting the model
if positive:
w = np.abs(w)
X = random_state.randn(n_samples, n_features)
rnd = random_state.uniform(size=(n_samples, n_features))
X[rnd > 0.5] = 0.0 # 50% of zeros in input signal
# generate training ground truth labels
y = np.dot(X, w)
X = sp.csc_matrix(X)
if n_targets == 1:
y = np.ravel(y)
return X, y
def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive):
n_samples, n_features, max_iter = 100, 100, 1000
n_informative = 10
X, y = make_sparse_data(n_samples, n_features, n_informative,
positive=positive)
X_train, X_test = X[n_samples // 2:], X[:n_samples // 2]
y_train, y_test = y[n_samples // 2:], y[:n_samples // 2]
s_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept,
max_iter=max_iter, tol=1e-7, positive=positive,
warm_start=True)
s_clf.fit(X_train, y_train)
assert_almost_equal(s_clf.dual_gap_, 0, 4)
assert s_clf.score(X_test, y_test) > 0.85
# check the convergence is the same as the dense version
d_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept,
max_iter=max_iter, tol=1e-7, positive=positive,
warm_start=True)
d_clf.fit(X_train.toarray(), y_train)
assert_almost_equal(d_clf.dual_gap_, 0, 4)
assert d_clf.score(X_test, y_test) > 0.85
assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)
assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)
# check that the coefs are sparse
assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative
def test_sparse_enet_not_as_toy_dataset():
_test_sparse_enet_not_as_toy_dataset(alpha=0.1, fit_intercept=False,
positive=False)
_test_sparse_enet_not_as_toy_dataset(alpha=0.1, fit_intercept=True,
positive=False)
_test_sparse_enet_not_as_toy_dataset(alpha=1e-3, fit_intercept=False,
positive=True)
_test_sparse_enet_not_as_toy_dataset(alpha=1e-3, fit_intercept=True,
positive=True)
def test_sparse_lasso_not_as_toy_dataset():
n_samples = 100
max_iter = 1000
n_informative = 10
X, y = make_sparse_data(n_samples=n_samples, n_informative=n_informative)
X_train, X_test = X[n_samples // 2:], X[:n_samples // 2]
y_train, y_test = y[n_samples // 2:], y[:n_samples // 2]
s_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
s_clf.fit(X_train, y_train)
assert_almost_equal(s_clf.dual_gap_, 0, 4)
assert s_clf.score(X_test, y_test) > 0.85
# check the convergence is the same as the dense version
d_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
d_clf.fit(X_train.toarray(), y_train)
assert_almost_equal(d_clf.dual_gap_, 0, 4)
assert d_clf.score(X_test, y_test) > 0.85
# check that the coefs are sparse
assert np.sum(s_clf.coef_ != 0.0) == n_informative
def test_enet_multitarget():
n_targets = 3
X, y = make_sparse_data(n_targets=n_targets)
estimator = ElasticNet(alpha=0.01, precompute=None)
# XXX: There is a bug when precompute is not None!
estimator.fit(X, y)
coef, intercept, dual_gap = (estimator.coef_,
estimator.intercept_,
estimator.dual_gap_)
for k in range(n_targets):
estimator.fit(X, y[:, k])
assert_array_almost_equal(coef[k, :], estimator.coef_)
assert_array_almost_equal(intercept[k], estimator.intercept_)
assert_array_almost_equal(dual_gap[k], estimator.dual_gap_)
def test_path_parameters():
X, y = make_sparse_data()
max_iter = 50
n_alphas = 10
clf = ElasticNetCV(n_alphas=n_alphas, eps=1e-3, max_iter=max_iter,
l1_ratio=0.5, fit_intercept=False)
ignore_warnings(clf.fit)(X, y) # new params
assert_almost_equal(0.5, clf.l1_ratio)
assert n_alphas == clf.n_alphas
assert n_alphas == len(clf.alphas_)
sparse_mse_path = clf.mse_path_
ignore_warnings(clf.fit)(X.toarray(), y) # compare with dense data
assert_almost_equal(clf.mse_path_, sparse_mse_path)
def test_same_output_sparse_dense_lasso_and_enet_cv():
X, y = make_sparse_data(n_samples=40, n_features=10)
for normalize in [True, False]:
clfs = ElasticNetCV(max_iter=100, normalize=normalize)
ignore_warnings(clfs.fit)(X, y)
clfd = ElasticNetCV(max_iter=100, normalize=normalize)
ignore_warnings(clfd.fit)(X.toarray(), y)
assert_almost_equal(clfs.alpha_, clfd.alpha_, 7)
assert_almost_equal(clfs.intercept_, clfd.intercept_, 7)
assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_)
assert_array_almost_equal(clfs.alphas_, clfd.alphas_)
clfs = LassoCV(max_iter=100, cv=4, normalize=normalize)
ignore_warnings(clfs.fit)(X, y)
clfd = LassoCV(max_iter=100, cv=4, normalize=normalize)
ignore_warnings(clfd.fit)(X.toarray(), y)
assert_almost_equal(clfs.alpha_, clfd.alpha_, 7)
assert_almost_equal(clfs.intercept_, clfd.intercept_, 7)
assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_)
assert_array_almost_equal(clfs.alphas_, clfd.alphas_)
def test_same_multiple_output_sparse_dense():
for normalize in [True, False]:
l = ElasticNet(normalize=normalize)
X = [[0, 1, 2, 3, 4],
[0, 2, 5, 8, 11],
[9, 10, 11, 12, 13],
[10, 11, 12, 13, 14]]
y = [[1, 2, 3, 4, 5],
[1, 3, 6, 9, 12],
[10, 11, 12, 13, 14],
[11, 12, 13, 14, 15]]
ignore_warnings(l.fit)(X, y)
sample = np.array([1, 2, 3, 4, 5]).reshape(1, -1)
predict_dense = l.predict(sample)
l_sp = ElasticNet(normalize=normalize)
X_sp = sp.coo_matrix(X)
ignore_warnings(l_sp.fit)(X_sp, y)
sample_sparse = sp.coo_matrix(sample)
predict_sparse = l_sp.predict(sample_sparse)
assert_array_almost_equal(predict_sparse, predict_dense)
def test_sparse_enet_coordinate_descent():
"""Test that a warning is issued if model does not converge"""
clf = Lasso(max_iter=2)
n_samples = 5
n_features = 2
X = sp.csc_matrix((n_samples, n_features)) * 1e50
y = np.ones(n_samples)
assert_warns(ConvergenceWarning, clf.fit, X, y)

View file

@ -0,0 +1,281 @@
"""
Testing for Theil-Sen module (sklearn.linear_model.theil_sen)
"""
# Author: Florian Wilhelm <florian.wilhelm@gmail.com>
# License: BSD 3 clause
import os
import sys
from contextlib import contextmanager
import numpy as np
from numpy.testing import assert_array_equal, assert_array_less
from numpy.testing import assert_array_almost_equal, assert_warns
from scipy.linalg import norm
from scipy.optimize import fmin_bfgs
from sklearn.exceptions import ConvergenceWarning
from sklearn.linear_model import LinearRegression, TheilSenRegressor
from sklearn.linear_model._theil_sen import _spatial_median, _breakdown_point
from sklearn.linear_model._theil_sen import _modified_weiszfeld_step
from sklearn.utils._testing import assert_almost_equal, assert_raises
@contextmanager
def no_stdout_stderr():
old_stdout = sys.stdout
old_stderr = sys.stderr
with open(os.devnull, 'w') as devnull:
sys.stdout = devnull
sys.stderr = devnull
yield
devnull.flush()
sys.stdout = old_stdout
sys.stderr = old_stderr
def gen_toy_problem_1d(intercept=True):
random_state = np.random.RandomState(0)
# Linear model y = 3*x + N(2, 0.1**2)
w = 3.
if intercept:
c = 2.
n_samples = 50
else:
c = 0.1
n_samples = 100
x = random_state.normal(size=n_samples)
noise = 0.1 * random_state.normal(size=n_samples)
y = w * x + c + noise
# Add some outliers
if intercept:
x[42], y[42] = (-2, 4)
x[43], y[43] = (-2.5, 8)
x[33], y[33] = (2.5, 1)
x[49], y[49] = (2.1, 2)
else:
x[42], y[42] = (-2, 4)
x[43], y[43] = (-2.5, 8)
x[53], y[53] = (2.5, 1)
x[60], y[60] = (2.1, 2)
x[72], y[72] = (1.8, -7)
return x[:, np.newaxis], y, w, c
def gen_toy_problem_2d():
random_state = np.random.RandomState(0)
n_samples = 100
# Linear model y = 5*x_1 + 10*x_2 + N(1, 0.1**2)
X = random_state.normal(size=(n_samples, 2))
w = np.array([5., 10.])
c = 1.
noise = 0.1 * random_state.normal(size=n_samples)
y = np.dot(X, w) + c + noise
# Add some outliers
n_outliers = n_samples // 10
ix = random_state.randint(0, n_samples, size=n_outliers)
y[ix] = 50 * random_state.normal(size=n_outliers)
return X, y, w, c
def gen_toy_problem_4d():
random_state = np.random.RandomState(0)
n_samples = 10000
# Linear model y = 5*x_1 + 10*x_2 + 42*x_3 + 7*x_4 + N(1, 0.1**2)
X = random_state.normal(size=(n_samples, 4))
w = np.array([5., 10., 42., 7.])
c = 1.
noise = 0.1 * random_state.normal(size=n_samples)
y = np.dot(X, w) + c + noise
# Add some outliers
n_outliers = n_samples // 10
ix = random_state.randint(0, n_samples, size=n_outliers)
y[ix] = 50 * random_state.normal(size=n_outliers)
return X, y, w, c
def test_modweiszfeld_step_1d():
X = np.array([1., 2., 3.]).reshape(3, 1)
# Check startvalue is element of X and solution
median = 2.
new_y = _modified_weiszfeld_step(X, median)
assert_array_almost_equal(new_y, median)
# Check startvalue is not the solution
y = 2.5
new_y = _modified_weiszfeld_step(X, y)
assert_array_less(median, new_y)
assert_array_less(new_y, y)
# Check startvalue is not the solution but element of X
y = 3.
new_y = _modified_weiszfeld_step(X, y)
assert_array_less(median, new_y)
assert_array_less(new_y, y)
# Check that a single vector is identity
X = np.array([1., 2., 3.]).reshape(1, 3)
y = X[0, ]
new_y = _modified_weiszfeld_step(X, y)
assert_array_equal(y, new_y)
def test_modweiszfeld_step_2d():
X = np.array([0., 0., 1., 1., 0., 1.]).reshape(3, 2)
y = np.array([0.5, 0.5])
# Check first two iterations
new_y = _modified_weiszfeld_step(X, y)
assert_array_almost_equal(new_y, np.array([1 / 3, 2 / 3]))
new_y = _modified_weiszfeld_step(X, new_y)
assert_array_almost_equal(new_y, np.array([0.2792408, 0.7207592]))
# Check fix point
y = np.array([0.21132505, 0.78867497])
new_y = _modified_weiszfeld_step(X, y)
assert_array_almost_equal(new_y, y)
def test_spatial_median_1d():
X = np.array([1., 2., 3.]).reshape(3, 1)
true_median = 2.
_, median = _spatial_median(X)
assert_array_almost_equal(median, true_median)
# Test larger problem and for exact solution in 1d case
random_state = np.random.RandomState(0)
X = random_state.randint(100, size=(1000, 1))
true_median = np.median(X.ravel())
_, median = _spatial_median(X)
assert_array_equal(median, true_median)
def test_spatial_median_2d():
X = np.array([0., 0., 1., 1., 0., 1.]).reshape(3, 2)
_, median = _spatial_median(X, max_iter=100, tol=1.e-6)
def cost_func(y):
dists = np.array([norm(x - y) for x in X])
return np.sum(dists)
# Check if median is solution of the Fermat-Weber location problem
fermat_weber = fmin_bfgs(cost_func, median, disp=False)
assert_array_almost_equal(median, fermat_weber)
# Check when maximum iteration is exceeded a warning is emitted
assert_warns(ConvergenceWarning, _spatial_median, X, max_iter=30, tol=0.)
def test_theil_sen_1d():
X, y, w, c = gen_toy_problem_1d()
# Check that Least Squares fails
lstq = LinearRegression().fit(X, y)
assert np.abs(lstq.coef_ - w) > 0.9
# Check that Theil-Sen works
theil_sen = TheilSenRegressor(random_state=0).fit(X, y)
assert_array_almost_equal(theil_sen.coef_, w, 1)
assert_array_almost_equal(theil_sen.intercept_, c, 1)
def test_theil_sen_1d_no_intercept():
X, y, w, c = gen_toy_problem_1d(intercept=False)
# Check that Least Squares fails
lstq = LinearRegression(fit_intercept=False).fit(X, y)
assert np.abs(lstq.coef_ - w - c) > 0.5
# Check that Theil-Sen works
theil_sen = TheilSenRegressor(fit_intercept=False,
random_state=0).fit(X, y)
assert_array_almost_equal(theil_sen.coef_, w + c, 1)
assert_almost_equal(theil_sen.intercept_, 0.)
def test_theil_sen_2d():
X, y, w, c = gen_toy_problem_2d()
# Check that Least Squares fails
lstq = LinearRegression().fit(X, y)
assert norm(lstq.coef_ - w) > 1.0
# Check that Theil-Sen works
theil_sen = TheilSenRegressor(max_subpopulation=1e3,
random_state=0).fit(X, y)
assert_array_almost_equal(theil_sen.coef_, w, 1)
assert_array_almost_equal(theil_sen.intercept_, c, 1)
def test_calc_breakdown_point():
bp = _breakdown_point(1e10, 2)
assert np.abs(bp - 1 + 1 / (np.sqrt(2))) < 1.e-6
def test_checksubparams_negative_subpopulation():
X, y, w, c = gen_toy_problem_1d()
theil_sen = TheilSenRegressor(max_subpopulation=-1, random_state=0)
assert_raises(ValueError, theil_sen.fit, X, y)
def test_checksubparams_too_few_subsamples():
X, y, w, c = gen_toy_problem_1d()
theil_sen = TheilSenRegressor(n_subsamples=1, random_state=0)
assert_raises(ValueError, theil_sen.fit, X, y)
def test_checksubparams_too_many_subsamples():
X, y, w, c = gen_toy_problem_1d()
theil_sen = TheilSenRegressor(n_subsamples=101, random_state=0)
assert_raises(ValueError, theil_sen.fit, X, y)
def test_checksubparams_n_subsamples_if_less_samples_than_features():
random_state = np.random.RandomState(0)
n_samples, n_features = 10, 20
X = random_state.normal(size=(n_samples, n_features))
y = random_state.normal(size=n_samples)
theil_sen = TheilSenRegressor(n_subsamples=9, random_state=0)
assert_raises(ValueError, theil_sen.fit, X, y)
def test_subpopulation():
X, y, w, c = gen_toy_problem_4d()
theil_sen = TheilSenRegressor(max_subpopulation=250,
random_state=0).fit(X, y)
assert_array_almost_equal(theil_sen.coef_, w, 1)
assert_array_almost_equal(theil_sen.intercept_, c, 1)
def test_subsamples():
X, y, w, c = gen_toy_problem_4d()
theil_sen = TheilSenRegressor(n_subsamples=X.shape[0],
random_state=0).fit(X, y)
lstq = LinearRegression().fit(X, y)
# Check for exact the same results as Least Squares
assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 9)
def test_verbosity():
X, y, w, c = gen_toy_problem_1d()
# Check that Theil-Sen can be verbose
with no_stdout_stderr():
TheilSenRegressor(verbose=True, random_state=0).fit(X, y)
TheilSenRegressor(verbose=True,
max_subpopulation=10,
random_state=0).fit(X, y)
def test_theil_sen_parallel():
X, y, w, c = gen_toy_problem_2d()
# Check that Least Squares fails
lstq = LinearRegression().fit(X, y)
assert norm(lstq.coef_ - w) > 1.0
# Check that Theil-Sen works
theil_sen = TheilSenRegressor(n_jobs=2,
random_state=0,
max_subpopulation=2e3).fit(X, y)
assert_array_almost_equal(theil_sen.coef_, w, 1)
assert_array_almost_equal(theil_sen.intercept_, c, 1)
def test_less_samples_than_features():
random_state = np.random.RandomState(0)
n_samples, n_features = 10, 20
X = random_state.normal(size=(n_samples, n_features))
y = random_state.normal(size=n_samples)
# Check that Theil-Sen falls back to Least Squares if fit_intercept=False
theil_sen = TheilSenRegressor(fit_intercept=False,
random_state=0).fit(X, y)
lstq = LinearRegression(fit_intercept=False).fit(X, y)
assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12)
# Check fit_intercept=True case. This will not be equal to the Least
# Squares solution since the intercept is calculated differently.
theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y)
y_pred = theil_sen.predict(X)
assert_array_almost_equal(y_pred, y, 12)