Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
530
venv/Lib/site-packages/sklearn/linear_model/tests/test_base.py
Normal file
530
venv/Lib/site-packages/sklearn/linear_model/tests/test_base.py
Normal file
|
@ -0,0 +1,530 @@
|
|||
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
|
||||
# Fabian Pedregosa <fabian.pedregosa@inria.fr>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
from scipy import sparse
|
||||
from scipy import linalg
|
||||
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_array_equal
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
from sklearn.utils._testing import assert_allclose
|
||||
from sklearn.utils.fixes import parse_version
|
||||
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.linear_model._base import _preprocess_data
|
||||
from sklearn.linear_model._base import _rescale_data
|
||||
from sklearn.linear_model._base import make_dataset
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.datasets import make_sparse_uncorrelated
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.datasets import load_iris
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
rtol = 1e-6
|
||||
|
||||
|
||||
def test_linear_regression():
|
||||
# Test LinearRegression on a simple dataset.
|
||||
# a simple dataset
|
||||
X = [[1], [2]]
|
||||
Y = [1, 2]
|
||||
|
||||
reg = LinearRegression()
|
||||
reg.fit(X, Y)
|
||||
|
||||
assert_array_almost_equal(reg.coef_, [1])
|
||||
assert_array_almost_equal(reg.intercept_, [0])
|
||||
assert_array_almost_equal(reg.predict(X), [1, 2])
|
||||
|
||||
# test it also for degenerate input
|
||||
X = [[1]]
|
||||
Y = [0]
|
||||
|
||||
reg = LinearRegression()
|
||||
reg.fit(X, Y)
|
||||
assert_array_almost_equal(reg.coef_, [0])
|
||||
assert_array_almost_equal(reg.intercept_, [0])
|
||||
assert_array_almost_equal(reg.predict(X), [0])
|
||||
|
||||
|
||||
def test_linear_regression_sample_weights():
|
||||
# TODO: loop over sparse data as well
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
|
||||
# It would not work with under-determined systems
|
||||
for n_samples, n_features in ((6, 5), ):
|
||||
|
||||
y = rng.randn(n_samples)
|
||||
X = rng.randn(n_samples, n_features)
|
||||
sample_weight = 1.0 + rng.rand(n_samples)
|
||||
|
||||
for intercept in (True, False):
|
||||
|
||||
# LinearRegression with explicit sample_weight
|
||||
reg = LinearRegression(fit_intercept=intercept)
|
||||
reg.fit(X, y, sample_weight=sample_weight)
|
||||
coefs1 = reg.coef_
|
||||
inter1 = reg.intercept_
|
||||
|
||||
assert reg.coef_.shape == (X.shape[1], ) # sanity checks
|
||||
assert reg.score(X, y) > 0.5
|
||||
|
||||
# Closed form of the weighted least square
|
||||
# theta = (X^T W X)^(-1) * X^T W y
|
||||
W = np.diag(sample_weight)
|
||||
if intercept is False:
|
||||
X_aug = X
|
||||
else:
|
||||
dummy_column = np.ones(shape=(n_samples, 1))
|
||||
X_aug = np.concatenate((dummy_column, X), axis=1)
|
||||
|
||||
coefs2 = linalg.solve(X_aug.T.dot(W).dot(X_aug),
|
||||
X_aug.T.dot(W).dot(y))
|
||||
|
||||
if intercept is False:
|
||||
assert_array_almost_equal(coefs1, coefs2)
|
||||
else:
|
||||
assert_array_almost_equal(coefs1, coefs2[1:])
|
||||
assert_almost_equal(inter1, coefs2[0])
|
||||
|
||||
|
||||
def test_raises_value_error_if_sample_weights_greater_than_1d():
|
||||
# Sample weights must be either scalar or 1D
|
||||
|
||||
n_sampless = [2, 3]
|
||||
n_featuress = [3, 2]
|
||||
|
||||
for n_samples, n_features in zip(n_sampless, n_featuress):
|
||||
X = rng.randn(n_samples, n_features)
|
||||
y = rng.randn(n_samples)
|
||||
sample_weights_OK = rng.randn(n_samples) ** 2 + 1
|
||||
sample_weights_OK_1 = 1.
|
||||
sample_weights_OK_2 = 2.
|
||||
|
||||
reg = LinearRegression()
|
||||
|
||||
# make sure the "OK" sample weights actually work
|
||||
reg.fit(X, y, sample_weights_OK)
|
||||
reg.fit(X, y, sample_weights_OK_1)
|
||||
reg.fit(X, y, sample_weights_OK_2)
|
||||
|
||||
|
||||
def test_fit_intercept():
|
||||
# Test assertions on betas shape.
|
||||
X2 = np.array([[0.38349978, 0.61650022],
|
||||
[0.58853682, 0.41146318]])
|
||||
X3 = np.array([[0.27677969, 0.70693172, 0.01628859],
|
||||
[0.08385139, 0.20692515, 0.70922346]])
|
||||
y = np.array([1, 1])
|
||||
|
||||
lr2_without_intercept = LinearRegression(fit_intercept=False).fit(X2, y)
|
||||
lr2_with_intercept = LinearRegression().fit(X2, y)
|
||||
|
||||
lr3_without_intercept = LinearRegression(fit_intercept=False).fit(X3, y)
|
||||
lr3_with_intercept = LinearRegression().fit(X3, y)
|
||||
|
||||
assert (lr2_with_intercept.coef_.shape ==
|
||||
lr2_without_intercept.coef_.shape)
|
||||
assert (lr3_with_intercept.coef_.shape ==
|
||||
lr3_without_intercept.coef_.shape)
|
||||
assert (lr2_without_intercept.coef_.ndim ==
|
||||
lr3_without_intercept.coef_.ndim)
|
||||
|
||||
|
||||
def test_linear_regression_sparse(random_state=0):
|
||||
# Test that linear regression also works with sparse data
|
||||
random_state = check_random_state(random_state)
|
||||
for i in range(10):
|
||||
n = 100
|
||||
X = sparse.eye(n, n)
|
||||
beta = random_state.rand(n)
|
||||
y = X * beta[:, np.newaxis]
|
||||
|
||||
ols = LinearRegression()
|
||||
ols.fit(X, y.ravel())
|
||||
assert_array_almost_equal(beta, ols.coef_ + ols.intercept_)
|
||||
|
||||
assert_array_almost_equal(ols.predict(X) - y.ravel(), 0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('normalize', [True, False])
|
||||
@pytest.mark.parametrize('fit_intercept', [True, False])
|
||||
def test_linear_regression_sparse_equal_dense(normalize, fit_intercept):
|
||||
# Test that linear regression agrees between sparse and dense
|
||||
rng = check_random_state(0)
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
X = rng.randn(n_samples, n_features)
|
||||
X[X < 0.1] = 0.
|
||||
Xcsr = sparse.csr_matrix(X)
|
||||
y = rng.rand(n_samples)
|
||||
params = dict(normalize=normalize, fit_intercept=fit_intercept)
|
||||
clf_dense = LinearRegression(**params)
|
||||
clf_sparse = LinearRegression(**params)
|
||||
clf_dense.fit(X, y)
|
||||
clf_sparse.fit(Xcsr, y)
|
||||
assert clf_dense.intercept_ == pytest.approx(clf_sparse.intercept_)
|
||||
assert_allclose(clf_dense.coef_, clf_sparse.coef_)
|
||||
|
||||
|
||||
def test_linear_regression_multiple_outcome(random_state=0):
|
||||
# Test multiple-outcome linear regressions
|
||||
X, y = make_regression(random_state=random_state)
|
||||
|
||||
Y = np.vstack((y, y)).T
|
||||
n_features = X.shape[1]
|
||||
|
||||
reg = LinearRegression()
|
||||
reg.fit((X), Y)
|
||||
assert reg.coef_.shape == (2, n_features)
|
||||
Y_pred = reg.predict(X)
|
||||
reg.fit(X, y)
|
||||
y_pred = reg.predict(X)
|
||||
assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
|
||||
|
||||
|
||||
def test_linear_regression_sparse_multiple_outcome(random_state=0):
|
||||
# Test multiple-outcome linear regressions with sparse data
|
||||
random_state = check_random_state(random_state)
|
||||
X, y = make_sparse_uncorrelated(random_state=random_state)
|
||||
X = sparse.coo_matrix(X)
|
||||
Y = np.vstack((y, y)).T
|
||||
n_features = X.shape[1]
|
||||
|
||||
ols = LinearRegression()
|
||||
ols.fit(X, Y)
|
||||
assert ols.coef_.shape == (2, n_features)
|
||||
Y_pred = ols.predict(X)
|
||||
ols.fit(X, y.ravel())
|
||||
y_pred = ols.predict(X)
|
||||
assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
|
||||
|
||||
|
||||
def test_linear_regression_pd_sparse_dataframe_warning():
|
||||
pd = pytest.importorskip('pandas')
|
||||
# restrict the pd versions < '0.24.0' as they have a bug in is_sparse func
|
||||
if parse_version(pd.__version__) < parse_version('0.24.0'):
|
||||
pytest.skip("pandas 0.24+ required.")
|
||||
|
||||
# Warning is raised only when some of the columns is sparse
|
||||
df = pd.DataFrame({'0': np.random.randn(10)})
|
||||
for col in range(1, 4):
|
||||
arr = np.random.randn(10)
|
||||
arr[:8] = 0
|
||||
# all columns but the first column is sparse
|
||||
if col != 0:
|
||||
arr = pd.arrays.SparseArray(arr, fill_value=0)
|
||||
df[str(col)] = arr
|
||||
|
||||
msg = "pandas.DataFrame with sparse columns found."
|
||||
with pytest.warns(UserWarning, match=msg):
|
||||
reg = LinearRegression()
|
||||
reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
|
||||
|
||||
# does not warn when the whole dataframe is sparse
|
||||
df['0'] = pd.arrays.SparseArray(df['0'], fill_value=0)
|
||||
assert hasattr(df, "sparse")
|
||||
|
||||
with pytest.warns(None) as record:
|
||||
reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
|
||||
assert not record
|
||||
|
||||
|
||||
def test_preprocess_data():
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
X = rng.rand(n_samples, n_features)
|
||||
y = rng.rand(n_samples)
|
||||
expected_X_mean = np.mean(X, axis=0)
|
||||
expected_X_norm = np.std(X, axis=0) * np.sqrt(X.shape[0])
|
||||
expected_y_mean = np.mean(y, axis=0)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=False, normalize=False)
|
||||
assert_array_almost_equal(X_mean, np.zeros(n_features))
|
||||
assert_array_almost_equal(y_mean, 0)
|
||||
assert_array_almost_equal(X_norm, np.ones(n_features))
|
||||
assert_array_almost_equal(Xt, X)
|
||||
assert_array_almost_equal(yt, y)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=True, normalize=False)
|
||||
assert_array_almost_equal(X_mean, expected_X_mean)
|
||||
assert_array_almost_equal(y_mean, expected_y_mean)
|
||||
assert_array_almost_equal(X_norm, np.ones(n_features))
|
||||
assert_array_almost_equal(Xt, X - expected_X_mean)
|
||||
assert_array_almost_equal(yt, y - expected_y_mean)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=True, normalize=True)
|
||||
assert_array_almost_equal(X_mean, expected_X_mean)
|
||||
assert_array_almost_equal(y_mean, expected_y_mean)
|
||||
assert_array_almost_equal(X_norm, expected_X_norm)
|
||||
assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm)
|
||||
assert_array_almost_equal(yt, y - expected_y_mean)
|
||||
|
||||
|
||||
def test_preprocess_data_multioutput():
|
||||
n_samples = 200
|
||||
n_features = 3
|
||||
n_outputs = 2
|
||||
X = rng.rand(n_samples, n_features)
|
||||
y = rng.rand(n_samples, n_outputs)
|
||||
expected_y_mean = np.mean(y, axis=0)
|
||||
|
||||
args = [X, sparse.csc_matrix(X)]
|
||||
for X in args:
|
||||
_, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=False,
|
||||
normalize=False)
|
||||
assert_array_almost_equal(y_mean, np.zeros(n_outputs))
|
||||
assert_array_almost_equal(yt, y)
|
||||
|
||||
_, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=True,
|
||||
normalize=False)
|
||||
assert_array_almost_equal(y_mean, expected_y_mean)
|
||||
assert_array_almost_equal(yt, y - y_mean)
|
||||
|
||||
_, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=True,
|
||||
normalize=True)
|
||||
assert_array_almost_equal(y_mean, expected_y_mean)
|
||||
assert_array_almost_equal(yt, y - y_mean)
|
||||
|
||||
|
||||
def test_preprocess_data_weighted():
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
X = rng.rand(n_samples, n_features)
|
||||
y = rng.rand(n_samples)
|
||||
sample_weight = rng.rand(n_samples)
|
||||
expected_X_mean = np.average(X, axis=0, weights=sample_weight)
|
||||
expected_y_mean = np.average(y, axis=0, weights=sample_weight)
|
||||
|
||||
# XXX: if normalize=True, should we expect a weighted standard deviation?
|
||||
# Currently not weighted, but calculated with respect to weighted mean
|
||||
expected_X_norm = (np.sqrt(X.shape[0]) *
|
||||
np.mean((X - expected_X_mean) ** 2, axis=0) ** .5)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=True, normalize=False,
|
||||
sample_weight=sample_weight)
|
||||
assert_array_almost_equal(X_mean, expected_X_mean)
|
||||
assert_array_almost_equal(y_mean, expected_y_mean)
|
||||
assert_array_almost_equal(X_norm, np.ones(n_features))
|
||||
assert_array_almost_equal(Xt, X - expected_X_mean)
|
||||
assert_array_almost_equal(yt, y - expected_y_mean)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=True, normalize=True,
|
||||
sample_weight=sample_weight)
|
||||
assert_array_almost_equal(X_mean, expected_X_mean)
|
||||
assert_array_almost_equal(y_mean, expected_y_mean)
|
||||
assert_array_almost_equal(X_norm, expected_X_norm)
|
||||
assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm)
|
||||
assert_array_almost_equal(yt, y - expected_y_mean)
|
||||
|
||||
|
||||
def test_sparse_preprocess_data_with_return_mean():
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
# random_state not supported yet in sparse.rand
|
||||
X = sparse.rand(n_samples, n_features, density=.5) # , random_state=rng
|
||||
X = X.tolil()
|
||||
y = rng.rand(n_samples)
|
||||
XA = X.toarray()
|
||||
expected_X_norm = np.std(XA, axis=0) * np.sqrt(X.shape[0])
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=False, normalize=False,
|
||||
return_mean=True)
|
||||
assert_array_almost_equal(X_mean, np.zeros(n_features))
|
||||
assert_array_almost_equal(y_mean, 0)
|
||||
assert_array_almost_equal(X_norm, np.ones(n_features))
|
||||
assert_array_almost_equal(Xt.A, XA)
|
||||
assert_array_almost_equal(yt, y)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=True, normalize=False,
|
||||
return_mean=True)
|
||||
assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
|
||||
assert_array_almost_equal(y_mean, np.mean(y, axis=0))
|
||||
assert_array_almost_equal(X_norm, np.ones(n_features))
|
||||
assert_array_almost_equal(Xt.A, XA)
|
||||
assert_array_almost_equal(yt, y - np.mean(y, axis=0))
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=True, normalize=True,
|
||||
return_mean=True)
|
||||
assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
|
||||
assert_array_almost_equal(y_mean, np.mean(y, axis=0))
|
||||
assert_array_almost_equal(X_norm, expected_X_norm)
|
||||
assert_array_almost_equal(Xt.A, XA / expected_X_norm)
|
||||
assert_array_almost_equal(yt, y - np.mean(y, axis=0))
|
||||
|
||||
|
||||
def test_csr_preprocess_data():
|
||||
# Test output format of _preprocess_data, when input is csr
|
||||
X, y = make_regression()
|
||||
X[X < 2.5] = 0.0
|
||||
csr = sparse.csr_matrix(X)
|
||||
csr_, y, _, _, _ = _preprocess_data(csr, y, True)
|
||||
assert csr_.getformat() == 'csr'
|
||||
|
||||
|
||||
@pytest.mark.parametrize('is_sparse', (True, False))
|
||||
@pytest.mark.parametrize('to_copy', (True, False))
|
||||
def test_preprocess_copy_data_no_checks(is_sparse, to_copy):
|
||||
X, y = make_regression()
|
||||
X[X < 2.5] = 0.0
|
||||
|
||||
if is_sparse:
|
||||
X = sparse.csr_matrix(X)
|
||||
|
||||
X_, y_, _, _, _ = _preprocess_data(X, y, True,
|
||||
copy=to_copy, check_input=False)
|
||||
|
||||
if to_copy and is_sparse:
|
||||
assert not np.may_share_memory(X_.data, X.data)
|
||||
elif to_copy:
|
||||
assert not np.may_share_memory(X_, X)
|
||||
elif is_sparse:
|
||||
assert np.may_share_memory(X_.data, X.data)
|
||||
else:
|
||||
assert np.may_share_memory(X_, X)
|
||||
|
||||
|
||||
def test_dtype_preprocess_data():
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
X = rng.rand(n_samples, n_features)
|
||||
y = rng.rand(n_samples)
|
||||
|
||||
X_32 = np.asarray(X, dtype=np.float32)
|
||||
y_32 = np.asarray(y, dtype=np.float32)
|
||||
X_64 = np.asarray(X, dtype=np.float64)
|
||||
y_64 = np.asarray(y, dtype=np.float64)
|
||||
|
||||
for fit_intercept in [True, False]:
|
||||
for normalize in [True, False]:
|
||||
|
||||
Xt_32, yt_32, X_mean_32, y_mean_32, X_norm_32 = _preprocess_data(
|
||||
X_32, y_32, fit_intercept=fit_intercept, normalize=normalize,
|
||||
return_mean=True)
|
||||
|
||||
Xt_64, yt_64, X_mean_64, y_mean_64, X_norm_64 = _preprocess_data(
|
||||
X_64, y_64, fit_intercept=fit_intercept, normalize=normalize,
|
||||
return_mean=True)
|
||||
|
||||
Xt_3264, yt_3264, X_mean_3264, y_mean_3264, X_norm_3264 = (
|
||||
_preprocess_data(X_32, y_64, fit_intercept=fit_intercept,
|
||||
normalize=normalize, return_mean=True))
|
||||
|
||||
Xt_6432, yt_6432, X_mean_6432, y_mean_6432, X_norm_6432 = (
|
||||
_preprocess_data(X_64, y_32, fit_intercept=fit_intercept,
|
||||
normalize=normalize, return_mean=True))
|
||||
|
||||
assert Xt_32.dtype == np.float32
|
||||
assert yt_32.dtype == np.float32
|
||||
assert X_mean_32.dtype == np.float32
|
||||
assert y_mean_32.dtype == np.float32
|
||||
assert X_norm_32.dtype == np.float32
|
||||
|
||||
assert Xt_64.dtype == np.float64
|
||||
assert yt_64.dtype == np.float64
|
||||
assert X_mean_64.dtype == np.float64
|
||||
assert y_mean_64.dtype == np.float64
|
||||
assert X_norm_64.dtype == np.float64
|
||||
|
||||
assert Xt_3264.dtype == np.float32
|
||||
assert yt_3264.dtype == np.float32
|
||||
assert X_mean_3264.dtype == np.float32
|
||||
assert y_mean_3264.dtype == np.float32
|
||||
assert X_norm_3264.dtype == np.float32
|
||||
|
||||
assert Xt_6432.dtype == np.float64
|
||||
assert yt_6432.dtype == np.float64
|
||||
assert X_mean_6432.dtype == np.float64
|
||||
assert y_mean_6432.dtype == np.float64
|
||||
assert X_norm_6432.dtype == np.float64
|
||||
|
||||
assert X_32.dtype == np.float32
|
||||
assert y_32.dtype == np.float32
|
||||
assert X_64.dtype == np.float64
|
||||
assert y_64.dtype == np.float64
|
||||
|
||||
assert_array_almost_equal(Xt_32, Xt_64)
|
||||
assert_array_almost_equal(yt_32, yt_64)
|
||||
assert_array_almost_equal(X_mean_32, X_mean_64)
|
||||
assert_array_almost_equal(y_mean_32, y_mean_64)
|
||||
assert_array_almost_equal(X_norm_32, X_norm_64)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('n_targets', [None, 2])
|
||||
def test_rescale_data_dense(n_targets):
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
|
||||
sample_weight = 1.0 + rng.rand(n_samples)
|
||||
X = rng.rand(n_samples, n_features)
|
||||
if n_targets is None:
|
||||
y = rng.rand(n_samples)
|
||||
else:
|
||||
y = rng.rand(n_samples, n_targets)
|
||||
rescaled_X, rescaled_y = _rescale_data(X, y, sample_weight)
|
||||
rescaled_X2 = X * np.sqrt(sample_weight)[:, np.newaxis]
|
||||
if n_targets is None:
|
||||
rescaled_y2 = y * np.sqrt(sample_weight)
|
||||
else:
|
||||
rescaled_y2 = y * np.sqrt(sample_weight)[:, np.newaxis]
|
||||
assert_array_almost_equal(rescaled_X, rescaled_X2)
|
||||
assert_array_almost_equal(rescaled_y, rescaled_y2)
|
||||
|
||||
|
||||
def test_fused_types_make_dataset():
|
||||
iris = load_iris()
|
||||
|
||||
X_32 = iris.data.astype(np.float32)
|
||||
y_32 = iris.target.astype(np.float32)
|
||||
X_csr_32 = sparse.csr_matrix(X_32)
|
||||
sample_weight_32 = np.arange(y_32.size, dtype=np.float32)
|
||||
|
||||
X_64 = iris.data.astype(np.float64)
|
||||
y_64 = iris.target.astype(np.float64)
|
||||
X_csr_64 = sparse.csr_matrix(X_64)
|
||||
sample_weight_64 = np.arange(y_64.size, dtype=np.float64)
|
||||
|
||||
# array
|
||||
dataset_32, _ = make_dataset(X_32, y_32, sample_weight_32)
|
||||
dataset_64, _ = make_dataset(X_64, y_64, sample_weight_64)
|
||||
xi_32, yi_32, _, _ = dataset_32._next_py()
|
||||
xi_64, yi_64, _, _ = dataset_64._next_py()
|
||||
xi_data_32, _, _ = xi_32
|
||||
xi_data_64, _, _ = xi_64
|
||||
|
||||
assert xi_data_32.dtype == np.float32
|
||||
assert xi_data_64.dtype == np.float64
|
||||
assert_allclose(yi_64, yi_32, rtol=rtol)
|
||||
|
||||
# csr
|
||||
datasetcsr_32, _ = make_dataset(X_csr_32, y_32, sample_weight_32)
|
||||
datasetcsr_64, _ = make_dataset(X_csr_64, y_64, sample_weight_64)
|
||||
xicsr_32, yicsr_32, _, _ = datasetcsr_32._next_py()
|
||||
xicsr_64, yicsr_64, _, _ = datasetcsr_64._next_py()
|
||||
xicsr_data_32, _, _ = xicsr_32
|
||||
xicsr_data_64, _, _ = xicsr_64
|
||||
|
||||
assert xicsr_data_32.dtype == np.float32
|
||||
assert xicsr_data_64.dtype == np.float64
|
||||
|
||||
assert_allclose(xicsr_data_64, xicsr_data_32, rtol=rtol)
|
||||
assert_allclose(yicsr_64, yicsr_32, rtol=rtol)
|
||||
|
||||
assert_array_equal(xi_data_32, xicsr_data_32)
|
||||
assert_array_equal(xi_data_64, xicsr_data_64)
|
||||
assert_array_equal(yi_32, yicsr_32)
|
||||
assert_array_equal(yi_64, yicsr_64)
|
274
venv/Lib/site-packages/sklearn/linear_model/tests/test_bayes.py
Normal file
274
venv/Lib/site-packages/sklearn/linear_model/tests/test_bayes.py
Normal file
|
@ -0,0 +1,274 @@
|
|||
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
|
||||
# Fabian Pedregosa <fabian.pedregosa@inria.fr>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
from math import log
|
||||
|
||||
import numpy as np
|
||||
from scipy.linalg import pinvh
|
||||
import pytest
|
||||
|
||||
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
from sklearn.utils._testing import assert_array_less
|
||||
from sklearn.utils._testing import assert_raise_message
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.linear_model import BayesianRidge, ARDRegression
|
||||
from sklearn.linear_model import Ridge
|
||||
from sklearn import datasets
|
||||
from sklearn.utils.extmath import fast_logdet
|
||||
|
||||
diabetes = datasets.load_diabetes()
|
||||
|
||||
|
||||
def test_n_iter():
|
||||
"""Check value of n_iter."""
|
||||
X = np.array([[1], [2], [6], [8], [10]])
|
||||
y = np.array([1, 2, 6, 8, 10])
|
||||
clf = BayesianRidge(n_iter=0)
|
||||
msg = "n_iter should be greater than or equal to 1."
|
||||
assert_raise_message(ValueError, msg, clf.fit, X, y)
|
||||
|
||||
|
||||
def test_bayesian_ridge_scores():
|
||||
"""Check scores attribute shape"""
|
||||
X, y = diabetes.data, diabetes.target
|
||||
|
||||
clf = BayesianRidge(compute_score=True)
|
||||
clf.fit(X, y)
|
||||
|
||||
assert clf.scores_.shape == (clf.n_iter_ + 1,)
|
||||
|
||||
|
||||
def test_bayesian_ridge_score_values():
|
||||
"""Check value of score on toy example.
|
||||
|
||||
Compute log marginal likelihood with equation (36) in Sparse Bayesian
|
||||
Learning and the Relevance Vector Machine (Tipping, 2001):
|
||||
|
||||
- 0.5 * (log |Id/alpha + X.X^T/lambda| +
|
||||
y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi))
|
||||
+ lambda_1 * log(lambda) - lambda_2 * lambda
|
||||
+ alpha_1 * log(alpha) - alpha_2 * alpha
|
||||
|
||||
and check equality with the score computed during training.
|
||||
"""
|
||||
|
||||
X, y = diabetes.data, diabetes.target
|
||||
n_samples = X.shape[0]
|
||||
# check with initial values of alpha and lambda (see code for the values)
|
||||
eps = np.finfo(np.float64).eps
|
||||
alpha_ = 1. / (np.var(y) + eps)
|
||||
lambda_ = 1.
|
||||
|
||||
# value of the parameters of the Gamma hyperpriors
|
||||
alpha_1 = 0.1
|
||||
alpha_2 = 0.1
|
||||
lambda_1 = 0.1
|
||||
lambda_2 = 0.1
|
||||
|
||||
# compute score using formula of docstring
|
||||
score = lambda_1 * log(lambda_) - lambda_2 * lambda_
|
||||
score += alpha_1 * log(alpha_) - alpha_2 * alpha_
|
||||
M = 1. / alpha_ * np.eye(n_samples) + 1. / lambda_ * np.dot(X, X.T)
|
||||
M_inv = pinvh(M)
|
||||
score += - 0.5 * (fast_logdet(M) + np.dot(y.T, np.dot(M_inv, y)) +
|
||||
n_samples * log(2 * np.pi))
|
||||
|
||||
# compute score with BayesianRidge
|
||||
clf = BayesianRidge(alpha_1=alpha_1, alpha_2=alpha_2,
|
||||
lambda_1=lambda_1, lambda_2=lambda_2,
|
||||
n_iter=1, fit_intercept=False, compute_score=True)
|
||||
clf.fit(X, y)
|
||||
|
||||
assert_almost_equal(clf.scores_[0], score, decimal=9)
|
||||
|
||||
|
||||
def test_bayesian_ridge_parameter():
|
||||
# Test correctness of lambda_ and alpha_ parameters (GitHub issue #8224)
|
||||
X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
|
||||
y = np.array([1, 2, 3, 2, 0, 4, 5]).T
|
||||
|
||||
# A Ridge regression model using an alpha value equal to the ratio of
|
||||
# lambda_ and alpha_ from the Bayesian Ridge model must be identical
|
||||
br_model = BayesianRidge(compute_score=True).fit(X, y)
|
||||
rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(X, y)
|
||||
assert_array_almost_equal(rr_model.coef_, br_model.coef_)
|
||||
assert_almost_equal(rr_model.intercept_, br_model.intercept_)
|
||||
|
||||
|
||||
def test_bayesian_sample_weights():
|
||||
# Test correctness of the sample_weights method
|
||||
X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
|
||||
y = np.array([1, 2, 3, 2, 0, 4, 5]).T
|
||||
w = np.array([4, 3, 3, 1, 1, 2, 3]).T
|
||||
|
||||
# A Ridge regression model using an alpha value equal to the ratio of
|
||||
# lambda_ and alpha_ from the Bayesian Ridge model must be identical
|
||||
br_model = BayesianRidge(compute_score=True).fit(X, y, sample_weight=w)
|
||||
rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(
|
||||
X, y, sample_weight=w)
|
||||
assert_array_almost_equal(rr_model.coef_, br_model.coef_)
|
||||
assert_almost_equal(rr_model.intercept_, br_model.intercept_)
|
||||
|
||||
|
||||
def test_toy_bayesian_ridge_object():
|
||||
# Test BayesianRidge on toy
|
||||
X = np.array([[1], [2], [6], [8], [10]])
|
||||
Y = np.array([1, 2, 6, 8, 10])
|
||||
clf = BayesianRidge(compute_score=True)
|
||||
clf.fit(X, Y)
|
||||
|
||||
# Check that the model could approximately learn the identity function
|
||||
test = [[1], [3], [4]]
|
||||
assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
|
||||
|
||||
|
||||
def test_bayesian_initial_params():
|
||||
# Test BayesianRidge with initial values (alpha_init, lambda_init)
|
||||
X = np.vander(np.linspace(0, 4, 5), 4)
|
||||
y = np.array([0., 1., 0., -1., 0.]) # y = (x^3 - 6x^2 + 8x) / 3
|
||||
|
||||
# In this case, starting from the default initial values will increase
|
||||
# the bias of the fitted curve. So, lambda_init should be small.
|
||||
reg = BayesianRidge(alpha_init=1., lambda_init=1e-3)
|
||||
# Check the R2 score nearly equals to one.
|
||||
r2 = reg.fit(X, y).score(X, y)
|
||||
assert_almost_equal(r2, 1.)
|
||||
|
||||
|
||||
def test_prediction_bayesian_ridge_ard_with_constant_input():
|
||||
# Test BayesianRidge and ARDRegression predictions for edge case of
|
||||
# constant target vectors
|
||||
n_samples = 4
|
||||
n_features = 5
|
||||
random_state = check_random_state(42)
|
||||
constant_value = random_state.rand()
|
||||
X = random_state.random_sample((n_samples, n_features))
|
||||
y = np.full(n_samples, constant_value,
|
||||
dtype=np.array(constant_value).dtype)
|
||||
expected = np.full(n_samples, constant_value,
|
||||
dtype=np.array(constant_value).dtype)
|
||||
|
||||
for clf in [BayesianRidge(), ARDRegression()]:
|
||||
y_pred = clf.fit(X, y).predict(X)
|
||||
assert_array_almost_equal(y_pred, expected)
|
||||
|
||||
|
||||
def test_std_bayesian_ridge_ard_with_constant_input():
|
||||
# Test BayesianRidge and ARDRegression standard dev. for edge case of
|
||||
# constant target vector
|
||||
# The standard dev. should be relatively small (< 0.01 is tested here)
|
||||
n_samples = 10
|
||||
n_features = 5
|
||||
random_state = check_random_state(42)
|
||||
constant_value = random_state.rand()
|
||||
X = random_state.random_sample((n_samples, n_features))
|
||||
y = np.full(n_samples, constant_value,
|
||||
dtype=np.array(constant_value).dtype)
|
||||
expected_upper_boundary = 0.01
|
||||
|
||||
for clf in [BayesianRidge(), ARDRegression()]:
|
||||
_, y_std = clf.fit(X, y).predict(X, return_std=True)
|
||||
assert_array_less(y_std, expected_upper_boundary)
|
||||
|
||||
|
||||
def test_update_of_sigma_in_ard():
|
||||
# Checks that `sigma_` is updated correctly after the last iteration
|
||||
# of the ARDRegression algorithm. See issue #10128.
|
||||
X = np.array([[1, 0],
|
||||
[0, 0]])
|
||||
y = np.array([0, 0])
|
||||
clf = ARDRegression(n_iter=1)
|
||||
clf.fit(X, y)
|
||||
# With the inputs above, ARDRegression prunes both of the two coefficients
|
||||
# in the first iteration. Hence, the expected shape of `sigma_` is (0, 0).
|
||||
assert clf.sigma_.shape == (0, 0)
|
||||
# Ensure that no error is thrown at prediction stage
|
||||
clf.predict(X, return_std=True)
|
||||
|
||||
|
||||
def test_toy_ard_object():
|
||||
# Test BayesianRegression ARD classifier
|
||||
X = np.array([[1], [2], [3]])
|
||||
Y = np.array([1, 2, 3])
|
||||
clf = ARDRegression(compute_score=True)
|
||||
clf.fit(X, Y)
|
||||
|
||||
# Check that the model could approximately learn the identity function
|
||||
test = [[1], [3], [4]]
|
||||
assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('seed', range(100))
|
||||
@pytest.mark.parametrize('n_samples, n_features', ((10, 100), (100, 10)))
|
||||
def test_ard_accuracy_on_easy_problem(seed, n_samples, n_features):
|
||||
# Check that ARD converges with reasonable accuracy on an easy problem
|
||||
# (Github issue #14055)
|
||||
X = np.random.RandomState(seed=seed).normal(size=(250, 3))
|
||||
y = X[:, 1]
|
||||
|
||||
regressor = ARDRegression()
|
||||
regressor.fit(X, y)
|
||||
|
||||
abs_coef_error = np.abs(1 - regressor.coef_[1])
|
||||
assert abs_coef_error < 1e-10
|
||||
|
||||
|
||||
def test_return_std():
|
||||
# Test return_std option for both Bayesian regressors
|
||||
def f(X):
|
||||
return np.dot(X, w) + b
|
||||
|
||||
def f_noise(X, noise_mult):
|
||||
return f(X) + np.random.randn(X.shape[0]) * noise_mult
|
||||
|
||||
d = 5
|
||||
n_train = 50
|
||||
n_test = 10
|
||||
|
||||
w = np.array([1.0, 0.0, 1.0, -1.0, 0.0])
|
||||
b = 1.0
|
||||
|
||||
X = np.random.random((n_train, d))
|
||||
X_test = np.random.random((n_test, d))
|
||||
|
||||
for decimal, noise_mult in enumerate([1, 0.1, 0.01]):
|
||||
y = f_noise(X, noise_mult)
|
||||
|
||||
m1 = BayesianRidge()
|
||||
m1.fit(X, y)
|
||||
y_mean1, y_std1 = m1.predict(X_test, return_std=True)
|
||||
assert_array_almost_equal(y_std1, noise_mult, decimal=decimal)
|
||||
|
||||
m2 = ARDRegression()
|
||||
m2.fit(X, y)
|
||||
y_mean2, y_std2 = m2.predict(X_test, return_std=True)
|
||||
assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('seed', range(10))
|
||||
def test_update_sigma(seed):
|
||||
# make sure the two update_sigma() helpers are equivalent. The woodbury
|
||||
# formula is used when n_samples < n_features, and the other one is used
|
||||
# otherwise.
|
||||
|
||||
rng = np.random.RandomState(seed)
|
||||
|
||||
# set n_samples == n_features to avoid instability issues when inverting
|
||||
# the matrices. Using the woodbury formula would be unstable when
|
||||
# n_samples > n_features
|
||||
n_samples = n_features = 10
|
||||
X = rng.randn(n_samples, n_features)
|
||||
alpha = 1
|
||||
lmbda = np.arange(1, n_features + 1)
|
||||
keep_lambda = np.array([True] * n_features)
|
||||
|
||||
reg = ARDRegression()
|
||||
|
||||
sigma = reg._update_sigma(X, alpha, lmbda, keep_lambda)
|
||||
sigma_woodbury = reg._update_sigma_woodbury(X, alpha, lmbda, keep_lambda)
|
||||
|
||||
np.testing.assert_allclose(sigma, sigma_woodbury)
|
File diff suppressed because it is too large
Load diff
211
venv/Lib/site-packages/sklearn/linear_model/tests/test_huber.py
Normal file
211
venv/Lib/site-packages/sklearn/linear_model/tests/test_huber.py
Normal file
|
@ -0,0 +1,211 @@
|
|||
# Authors: Manoj Kumar mks542@nyu.edu
|
||||
# License: BSD 3 clause
|
||||
|
||||
import numpy as np
|
||||
from scipy import optimize, sparse
|
||||
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
from sklearn.utils._testing import assert_array_equal
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.linear_model import (
|
||||
HuberRegressor, LinearRegression, SGDRegressor, Ridge)
|
||||
from sklearn.linear_model._huber import _huber_loss_and_gradient
|
||||
|
||||
|
||||
def make_regression_with_outliers(n_samples=50, n_features=20):
|
||||
rng = np.random.RandomState(0)
|
||||
# Generate data with outliers by replacing 10% of the samples with noise.
|
||||
X, y = make_regression(
|
||||
n_samples=n_samples, n_features=n_features,
|
||||
random_state=0, noise=0.05)
|
||||
|
||||
# Replace 10% of the sample with noise.
|
||||
num_noise = int(0.1 * n_samples)
|
||||
random_samples = rng.randint(0, n_samples, num_noise)
|
||||
X[random_samples, :] = 2.0 * rng.normal(0, 1, (num_noise, X.shape[1]))
|
||||
return X, y
|
||||
|
||||
|
||||
def test_huber_equals_lr_for_high_epsilon():
|
||||
# Test that Ridge matches LinearRegression for large epsilon
|
||||
X, y = make_regression_with_outliers()
|
||||
lr = LinearRegression()
|
||||
lr.fit(X, y)
|
||||
huber = HuberRegressor(epsilon=1e3, alpha=0.0)
|
||||
huber.fit(X, y)
|
||||
assert_almost_equal(huber.coef_, lr.coef_, 3)
|
||||
assert_almost_equal(huber.intercept_, lr.intercept_, 2)
|
||||
|
||||
|
||||
def test_huber_max_iter():
|
||||
X, y = make_regression_with_outliers()
|
||||
huber = HuberRegressor(max_iter=1)
|
||||
huber.fit(X, y)
|
||||
assert huber.n_iter_ == huber.max_iter
|
||||
|
||||
|
||||
def test_huber_gradient():
|
||||
# Test that the gradient calculated by _huber_loss_and_gradient is correct
|
||||
rng = np.random.RandomState(1)
|
||||
X, y = make_regression_with_outliers()
|
||||
sample_weight = rng.randint(1, 3, (y.shape[0]))
|
||||
|
||||
def loss_func(x, *args):
|
||||
return _huber_loss_and_gradient(x, *args)[0]
|
||||
|
||||
def grad_func(x, *args):
|
||||
return _huber_loss_and_gradient(x, *args)[1]
|
||||
|
||||
# Check using optimize.check_grad that the gradients are equal.
|
||||
for _ in range(5):
|
||||
# Check for both fit_intercept and otherwise.
|
||||
for n_features in [X.shape[1] + 1, X.shape[1] + 2]:
|
||||
w = rng.randn(n_features)
|
||||
w[-1] = np.abs(w[-1])
|
||||
grad_same = optimize.check_grad(
|
||||
loss_func, grad_func, w, X, y, 0.01, 0.1, sample_weight)
|
||||
assert_almost_equal(grad_same, 1e-6, 4)
|
||||
|
||||
|
||||
def test_huber_sample_weights():
|
||||
# Test sample_weights implementation in HuberRegressor"""
|
||||
|
||||
X, y = make_regression_with_outliers()
|
||||
huber = HuberRegressor()
|
||||
huber.fit(X, y)
|
||||
huber_coef = huber.coef_
|
||||
huber_intercept = huber.intercept_
|
||||
|
||||
# Rescale coefs before comparing with assert_array_almost_equal to make
|
||||
# sure that the number of decimal places used is somewhat insensitive to
|
||||
# the amplitude of the coefficients and therefore to the scale of the
|
||||
# data and the regularization parameter
|
||||
scale = max(np.mean(np.abs(huber.coef_)),
|
||||
np.mean(np.abs(huber.intercept_)))
|
||||
|
||||
huber.fit(X, y, sample_weight=np.ones(y.shape[0]))
|
||||
assert_array_almost_equal(huber.coef_ / scale, huber_coef / scale)
|
||||
assert_array_almost_equal(huber.intercept_ / scale,
|
||||
huber_intercept / scale)
|
||||
|
||||
X, y = make_regression_with_outliers(n_samples=5, n_features=20)
|
||||
X_new = np.vstack((X, np.vstack((X[1], X[1], X[3]))))
|
||||
y_new = np.concatenate((y, [y[1]], [y[1]], [y[3]]))
|
||||
huber.fit(X_new, y_new)
|
||||
huber_coef = huber.coef_
|
||||
huber_intercept = huber.intercept_
|
||||
sample_weight = np.ones(X.shape[0])
|
||||
sample_weight[1] = 3
|
||||
sample_weight[3] = 2
|
||||
huber.fit(X, y, sample_weight=sample_weight)
|
||||
|
||||
assert_array_almost_equal(huber.coef_ / scale, huber_coef / scale)
|
||||
assert_array_almost_equal(huber.intercept_ / scale,
|
||||
huber_intercept / scale)
|
||||
|
||||
# Test sparse implementation with sample weights.
|
||||
X_csr = sparse.csr_matrix(X)
|
||||
huber_sparse = HuberRegressor()
|
||||
huber_sparse.fit(X_csr, y, sample_weight=sample_weight)
|
||||
assert_array_almost_equal(huber_sparse.coef_ / scale,
|
||||
huber_coef / scale)
|
||||
|
||||
|
||||
def test_huber_sparse():
|
||||
X, y = make_regression_with_outliers()
|
||||
huber = HuberRegressor(alpha=0.1)
|
||||
huber.fit(X, y)
|
||||
|
||||
X_csr = sparse.csr_matrix(X)
|
||||
huber_sparse = HuberRegressor(alpha=0.1)
|
||||
huber_sparse.fit(X_csr, y)
|
||||
assert_array_almost_equal(huber_sparse.coef_, huber.coef_)
|
||||
assert_array_equal(huber.outliers_, huber_sparse.outliers_)
|
||||
|
||||
|
||||
def test_huber_scaling_invariant():
|
||||
# Test that outliers filtering is scaling independent.
|
||||
X, y = make_regression_with_outliers()
|
||||
huber = HuberRegressor(fit_intercept=False, alpha=0.0, max_iter=100)
|
||||
huber.fit(X, y)
|
||||
n_outliers_mask_1 = huber.outliers_
|
||||
assert not np.all(n_outliers_mask_1)
|
||||
|
||||
huber.fit(X, 2. * y)
|
||||
n_outliers_mask_2 = huber.outliers_
|
||||
assert_array_equal(n_outliers_mask_2, n_outliers_mask_1)
|
||||
|
||||
huber.fit(2. * X, 2. * y)
|
||||
n_outliers_mask_3 = huber.outliers_
|
||||
assert_array_equal(n_outliers_mask_3, n_outliers_mask_1)
|
||||
|
||||
|
||||
def test_huber_and_sgd_same_results():
|
||||
# Test they should converge to same coefficients for same parameters
|
||||
|
||||
X, y = make_regression_with_outliers(n_samples=10, n_features=2)
|
||||
|
||||
# Fit once to find out the scale parameter. Scale down X and y by scale
|
||||
# so that the scale parameter is optimized to 1.0
|
||||
huber = HuberRegressor(fit_intercept=False, alpha=0.0, max_iter=100,
|
||||
epsilon=1.35)
|
||||
huber.fit(X, y)
|
||||
X_scale = X / huber.scale_
|
||||
y_scale = y / huber.scale_
|
||||
huber.fit(X_scale, y_scale)
|
||||
assert_almost_equal(huber.scale_, 1.0, 3)
|
||||
|
||||
sgdreg = SGDRegressor(
|
||||
alpha=0.0, loss="huber", shuffle=True, random_state=0, max_iter=10000,
|
||||
fit_intercept=False, epsilon=1.35, tol=None)
|
||||
sgdreg.fit(X_scale, y_scale)
|
||||
assert_array_almost_equal(huber.coef_, sgdreg.coef_, 1)
|
||||
|
||||
|
||||
def test_huber_warm_start():
|
||||
X, y = make_regression_with_outliers()
|
||||
huber_warm = HuberRegressor(
|
||||
alpha=1.0, max_iter=10000, warm_start=True, tol=1e-1)
|
||||
|
||||
huber_warm.fit(X, y)
|
||||
huber_warm_coef = huber_warm.coef_.copy()
|
||||
huber_warm.fit(X, y)
|
||||
|
||||
# SciPy performs the tol check after doing the coef updates, so
|
||||
# these would be almost same but not equal.
|
||||
assert_array_almost_equal(huber_warm.coef_, huber_warm_coef, 1)
|
||||
|
||||
assert huber_warm.n_iter_ == 0
|
||||
|
||||
|
||||
def test_huber_better_r2_score():
|
||||
# Test that huber returns a better r2 score than non-outliers"""
|
||||
X, y = make_regression_with_outliers()
|
||||
huber = HuberRegressor(alpha=0.01)
|
||||
huber.fit(X, y)
|
||||
linear_loss = np.dot(X, huber.coef_) + huber.intercept_ - y
|
||||
mask = np.abs(linear_loss) < huber.epsilon * huber.scale_
|
||||
huber_score = huber.score(X[mask], y[mask])
|
||||
huber_outlier_score = huber.score(X[~mask], y[~mask])
|
||||
|
||||
# The Ridge regressor should be influenced by the outliers and hence
|
||||
# give a worse score on the non-outliers as compared to the huber
|
||||
# regressor.
|
||||
ridge = Ridge(alpha=0.01)
|
||||
ridge.fit(X, y)
|
||||
ridge_score = ridge.score(X[mask], y[mask])
|
||||
ridge_outlier_score = ridge.score(X[~mask], y[~mask])
|
||||
assert huber_score > ridge_score
|
||||
|
||||
# The huber model should also fit poorly on the outliers.
|
||||
assert ridge_outlier_score > huber_outlier_score
|
||||
|
||||
|
||||
def test_huber_bool():
|
||||
# Test that it does not crash with bool data
|
||||
X, y = make_regression(n_samples=200, n_features=2, noise=4.0,
|
||||
random_state=0)
|
||||
X_bool = X > 0
|
||||
HuberRegressor().fit(X_bool, y)
|
|
@ -0,0 +1,775 @@
|
|||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from scipy import linalg
|
||||
|
||||
from sklearn.base import clone
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.utils._testing import assert_allclose
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_raises
|
||||
from sklearn.utils._testing import ignore_warnings
|
||||
from sklearn.utils._testing import assert_warns
|
||||
from sklearn.utils._testing import TempMemmap
|
||||
from sklearn.utils.fixes import np_version, parse_version
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
from sklearn import linear_model, datasets
|
||||
from sklearn.linear_model._least_angle import _lars_path_residues
|
||||
from sklearn.linear_model import LassoLarsIC, lars_path
|
||||
from sklearn.linear_model import Lars, LassoLars
|
||||
|
||||
# TODO: use another dataset that has multiple drops
|
||||
diabetes = datasets.load_diabetes()
|
||||
X, y = diabetes.data, diabetes.target
|
||||
G = np.dot(X.T, X)
|
||||
Xy = np.dot(X.T, y)
|
||||
n_samples = y.size
|
||||
|
||||
|
||||
def test_simple():
|
||||
# Principle of Lars is to keep covariances tied and decreasing
|
||||
|
||||
# also test verbose output
|
||||
from io import StringIO
|
||||
import sys
|
||||
old_stdout = sys.stdout
|
||||
try:
|
||||
sys.stdout = StringIO()
|
||||
|
||||
_, _, coef_path_ = linear_model.lars_path(
|
||||
X, y, method='lar', verbose=10)
|
||||
|
||||
sys.stdout = old_stdout
|
||||
|
||||
for i, coef_ in enumerate(coef_path_.T):
|
||||
res = y - np.dot(X, coef_)
|
||||
cov = np.dot(X.T, res)
|
||||
C = np.max(abs(cov))
|
||||
eps = 1e-3
|
||||
ocur = len(cov[C - eps < abs(cov)])
|
||||
if i < X.shape[1]:
|
||||
assert ocur == i + 1
|
||||
else:
|
||||
# no more than max_pred variables can go into the active set
|
||||
assert ocur == X.shape[1]
|
||||
finally:
|
||||
sys.stdout = old_stdout
|
||||
|
||||
|
||||
def test_simple_precomputed():
|
||||
# The same, with precomputed Gram matrix
|
||||
|
||||
_, _, coef_path_ = linear_model.lars_path(
|
||||
X, y, Gram=G, method='lar')
|
||||
|
||||
for i, coef_ in enumerate(coef_path_.T):
|
||||
res = y - np.dot(X, coef_)
|
||||
cov = np.dot(X.T, res)
|
||||
C = np.max(abs(cov))
|
||||
eps = 1e-3
|
||||
ocur = len(cov[C - eps < abs(cov)])
|
||||
if i < X.shape[1]:
|
||||
assert ocur == i + 1
|
||||
else:
|
||||
# no more than max_pred variables can go into the active set
|
||||
assert ocur == X.shape[1]
|
||||
|
||||
|
||||
def _assert_same_lars_path_result(output1, output2):
|
||||
assert len(output1) == len(output2)
|
||||
for o1, o2 in zip(output1, output2):
|
||||
assert_allclose(o1, o2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ['lar', 'lasso'])
|
||||
@pytest.mark.parametrize('return_path', [True, False])
|
||||
def test_lars_path_gram_equivalent(method, return_path):
|
||||
_assert_same_lars_path_result(
|
||||
linear_model.lars_path_gram(
|
||||
Xy=Xy, Gram=G, n_samples=n_samples, method=method,
|
||||
return_path=return_path),
|
||||
linear_model.lars_path(
|
||||
X, y, Gram=G, method=method,
|
||||
return_path=return_path))
|
||||
|
||||
|
||||
def test_x_none_gram_none_raises_value_error():
|
||||
# Test that lars_path with no X and Gram raises exception
|
||||
Xy = np.dot(X.T, y)
|
||||
assert_raises(ValueError, linear_model.lars_path, None, y, Gram=None,
|
||||
Xy=Xy)
|
||||
|
||||
|
||||
def test_all_precomputed():
|
||||
# Test that lars_path with precomputed Gram and Xy gives the right answer
|
||||
G = np.dot(X.T, X)
|
||||
Xy = np.dot(X.T, y)
|
||||
for method in 'lar', 'lasso':
|
||||
output = linear_model.lars_path(X, y, method=method)
|
||||
output_pre = linear_model.lars_path(X, y, Gram=G, Xy=Xy,
|
||||
method=method)
|
||||
for expected, got in zip(output, output_pre):
|
||||
assert_array_almost_equal(expected, got)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore: `rcond` parameter will change')
|
||||
# numpy deprecation
|
||||
def test_lars_lstsq():
|
||||
# Test that Lars gives least square solution at the end
|
||||
# of the path
|
||||
X1 = 3 * X # use un-normalized dataset
|
||||
clf = linear_model.LassoLars(alpha=0.)
|
||||
clf.fit(X1, y)
|
||||
# Avoid FutureWarning about default value change when numpy >= 1.14
|
||||
rcond = None if np_version >= parse_version('1.14') else -1
|
||||
coef_lstsq = np.linalg.lstsq(X1, y, rcond=rcond)[0]
|
||||
assert_array_almost_equal(clf.coef_, coef_lstsq)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore:`rcond` parameter will change')
|
||||
# numpy deprecation
|
||||
def test_lasso_gives_lstsq_solution():
|
||||
# Test that Lars Lasso gives least square solution at the end
|
||||
# of the path
|
||||
_, _, coef_path_ = linear_model.lars_path(X, y, method='lasso')
|
||||
coef_lstsq = np.linalg.lstsq(X, y)[0]
|
||||
assert_array_almost_equal(coef_lstsq, coef_path_[:, -1])
|
||||
|
||||
|
||||
def test_collinearity():
|
||||
# Check that lars_path is robust to collinearity in input
|
||||
X = np.array([[3., 3., 1.],
|
||||
[2., 2., 0.],
|
||||
[1., 1., 0]])
|
||||
y = np.array([1., 0., 0])
|
||||
rng = np.random.RandomState(0)
|
||||
|
||||
f = ignore_warnings
|
||||
_, _, coef_path_ = f(linear_model.lars_path)(X, y, alpha_min=0.01)
|
||||
assert not np.isnan(coef_path_).any()
|
||||
residual = np.dot(X, coef_path_[:, -1]) - y
|
||||
assert (residual ** 2).sum() < 1. # just make sure it's bounded
|
||||
|
||||
n_samples = 10
|
||||
X = rng.rand(n_samples, 5)
|
||||
y = np.zeros(n_samples)
|
||||
_, _, coef_path_ = linear_model.lars_path(X, y, Gram='auto', copy_X=False,
|
||||
copy_Gram=False, alpha_min=0.,
|
||||
method='lasso', verbose=0,
|
||||
max_iter=500)
|
||||
assert_array_almost_equal(coef_path_, np.zeros_like(coef_path_))
|
||||
|
||||
|
||||
def test_no_path():
|
||||
# Test that the ``return_path=False`` option returns the correct output
|
||||
alphas_, _, coef_path_ = linear_model.lars_path(
|
||||
X, y, method='lar')
|
||||
alpha_, _, coef = linear_model.lars_path(
|
||||
X, y, method='lar', return_path=False)
|
||||
|
||||
assert_array_almost_equal(coef, coef_path_[:, -1])
|
||||
assert alpha_ == alphas_[-1]
|
||||
|
||||
|
||||
def test_no_path_precomputed():
|
||||
# Test that the ``return_path=False`` option with Gram remains correct
|
||||
alphas_, _, coef_path_ = linear_model.lars_path(
|
||||
X, y, method='lar', Gram=G)
|
||||
alpha_, _, coef = linear_model.lars_path(
|
||||
X, y, method='lar', Gram=G, return_path=False)
|
||||
|
||||
assert_array_almost_equal(coef, coef_path_[:, -1])
|
||||
assert alpha_ == alphas_[-1]
|
||||
|
||||
|
||||
def test_no_path_all_precomputed():
|
||||
# Test that the ``return_path=False`` option with Gram and Xy remains
|
||||
# correct
|
||||
X, y = 3 * diabetes.data, diabetes.target
|
||||
G = np.dot(X.T, X)
|
||||
Xy = np.dot(X.T, y)
|
||||
alphas_, _, coef_path_ = linear_model.lars_path(
|
||||
X, y, method='lasso', Xy=Xy, Gram=G, alpha_min=0.9)
|
||||
alpha_, _, coef = linear_model.lars_path(
|
||||
X, y, method='lasso', Gram=G, Xy=Xy, alpha_min=0.9, return_path=False)
|
||||
|
||||
assert_array_almost_equal(coef, coef_path_[:, -1])
|
||||
assert alpha_ == alphas_[-1]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'classifier',
|
||||
[linear_model.Lars, linear_model.LarsCV, linear_model.LassoLarsIC])
|
||||
def test_lars_precompute(classifier):
|
||||
# Check for different values of precompute
|
||||
G = np.dot(X.T, X)
|
||||
|
||||
clf = classifier(precompute=G)
|
||||
output_1 = ignore_warnings(clf.fit)(X, y).coef_
|
||||
for precompute in [True, False, 'auto', None]:
|
||||
clf = classifier(precompute=precompute)
|
||||
output_2 = clf.fit(X, y).coef_
|
||||
assert_array_almost_equal(output_1, output_2, decimal=8)
|
||||
|
||||
|
||||
def test_singular_matrix():
|
||||
# Test when input is a singular matrix
|
||||
X1 = np.array([[1, 1.], [1., 1.]])
|
||||
y1 = np.array([1, 1])
|
||||
_, _, coef_path = linear_model.lars_path(X1, y1)
|
||||
assert_array_almost_equal(coef_path.T, [[0, 0], [1, 0]])
|
||||
|
||||
|
||||
def test_rank_deficient_design():
|
||||
# consistency test that checks that LARS Lasso is handling rank
|
||||
# deficient input data (with n_features < rank) in the same way
|
||||
# as coordinate descent Lasso
|
||||
y = [5, 0, 5]
|
||||
for X in (
|
||||
[[5, 0],
|
||||
[0, 5],
|
||||
[10, 10]],
|
||||
[[10, 10, 0],
|
||||
[1e-32, 0, 0],
|
||||
[0, 0, 1]]
|
||||
):
|
||||
# To be able to use the coefs to compute the objective function,
|
||||
# we need to turn off normalization
|
||||
lars = linear_model.LassoLars(.1, normalize=False)
|
||||
coef_lars_ = lars.fit(X, y).coef_
|
||||
obj_lars = (1. / (2. * 3.)
|
||||
* linalg.norm(y - np.dot(X, coef_lars_)) ** 2
|
||||
+ .1 * linalg.norm(coef_lars_, 1))
|
||||
coord_descent = linear_model.Lasso(.1, tol=1e-6, normalize=False)
|
||||
coef_cd_ = coord_descent.fit(X, y).coef_
|
||||
obj_cd = ((1. / (2. * 3.)) * linalg.norm(y - np.dot(X, coef_cd_)) ** 2
|
||||
+ .1 * linalg.norm(coef_cd_, 1))
|
||||
assert obj_lars < obj_cd * (1. + 1e-8)
|
||||
|
||||
|
||||
def test_lasso_lars_vs_lasso_cd():
|
||||
# Test that LassoLars and Lasso using coordinate descent give the
|
||||
# same results.
|
||||
X = 3 * diabetes.data
|
||||
|
||||
alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
|
||||
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
|
||||
for c, a in zip(lasso_path.T, alphas):
|
||||
if a == 0:
|
||||
continue
|
||||
lasso_cd.alpha = a
|
||||
lasso_cd.fit(X, y)
|
||||
error = linalg.norm(c - lasso_cd.coef_)
|
||||
assert error < 0.01
|
||||
|
||||
# similar test, with the classifiers
|
||||
for alpha in np.linspace(1e-2, 1 - 1e-2, 20):
|
||||
clf1 = linear_model.LassoLars(alpha=alpha, normalize=False).fit(X, y)
|
||||
clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8,
|
||||
normalize=False).fit(X, y)
|
||||
err = linalg.norm(clf1.coef_ - clf2.coef_)
|
||||
assert err < 1e-3
|
||||
|
||||
# same test, with normalized data
|
||||
X = diabetes.data
|
||||
alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
|
||||
lasso_cd = linear_model.Lasso(fit_intercept=False, normalize=True,
|
||||
tol=1e-8)
|
||||
for c, a in zip(lasso_path.T, alphas):
|
||||
if a == 0:
|
||||
continue
|
||||
lasso_cd.alpha = a
|
||||
lasso_cd.fit(X, y)
|
||||
error = linalg.norm(c - lasso_cd.coef_)
|
||||
assert error < 0.01
|
||||
|
||||
|
||||
def test_lasso_lars_vs_lasso_cd_early_stopping():
|
||||
# Test that LassoLars and Lasso using coordinate descent give the
|
||||
# same results when early stopping is used.
|
||||
# (test : before, in the middle, and in the last part of the path)
|
||||
alphas_min = [10, 0.9, 1e-4]
|
||||
|
||||
for alpha_min in alphas_min:
|
||||
alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
|
||||
alpha_min=alpha_min)
|
||||
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
|
||||
lasso_cd.alpha = alphas[-1]
|
||||
lasso_cd.fit(X, y)
|
||||
error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
|
||||
assert error < 0.01
|
||||
|
||||
# same test, with normalization
|
||||
for alpha_min in alphas_min:
|
||||
alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
|
||||
alpha_min=alpha_min)
|
||||
lasso_cd = linear_model.Lasso(normalize=True, tol=1e-8)
|
||||
lasso_cd.alpha = alphas[-1]
|
||||
lasso_cd.fit(X, y)
|
||||
error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
|
||||
assert error < 0.01
|
||||
|
||||
|
||||
def test_lasso_lars_path_length():
|
||||
# Test that the path length of the LassoLars is right
|
||||
lasso = linear_model.LassoLars()
|
||||
lasso.fit(X, y)
|
||||
lasso2 = linear_model.LassoLars(alpha=lasso.alphas_[2])
|
||||
lasso2.fit(X, y)
|
||||
assert_array_almost_equal(lasso.alphas_[:3], lasso2.alphas_)
|
||||
# Also check that the sequence of alphas is always decreasing
|
||||
assert np.all(np.diff(lasso.alphas_) < 0)
|
||||
|
||||
|
||||
def test_lasso_lars_vs_lasso_cd_ill_conditioned():
|
||||
# Test lasso lars on a very ill-conditioned design, and check that
|
||||
# it does not blow up, and stays somewhat close to a solution given
|
||||
# by the coordinate descent solver
|
||||
# Also test that lasso_path (using lars_path output style) gives
|
||||
# the same result as lars_path and previous lasso output style
|
||||
# under these conditions.
|
||||
rng = np.random.RandomState(42)
|
||||
|
||||
# Generate data
|
||||
n, m = 70, 100
|
||||
k = 5
|
||||
X = rng.randn(n, m)
|
||||
w = np.zeros((m, 1))
|
||||
i = np.arange(0, m)
|
||||
rng.shuffle(i)
|
||||
supp = i[:k]
|
||||
w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
|
||||
y = np.dot(X, w)
|
||||
sigma = 0.2
|
||||
y += sigma * rng.rand(*y.shape)
|
||||
y = y.squeeze()
|
||||
lars_alphas, _, lars_coef = linear_model.lars_path(X, y, method='lasso')
|
||||
|
||||
_, lasso_coef2, _ = linear_model.lasso_path(X, y,
|
||||
alphas=lars_alphas,
|
||||
tol=1e-6,
|
||||
fit_intercept=False)
|
||||
|
||||
assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
|
||||
|
||||
|
||||
def test_lasso_lars_vs_lasso_cd_ill_conditioned2():
|
||||
# Create an ill-conditioned situation in which the LARS has to go
|
||||
# far in the path to converge, and check that LARS and coordinate
|
||||
# descent give the same answers
|
||||
# Note it used to be the case that Lars had to use the drop for good
|
||||
# strategy for this but this is no longer the case with the
|
||||
# equality_tolerance checks
|
||||
X = [[1e20, 1e20, 0],
|
||||
[-1e-32, 0, 0],
|
||||
[1, 1, 1]]
|
||||
y = [10, 10, 1]
|
||||
alpha = .0001
|
||||
|
||||
def objective_function(coef):
|
||||
return (1. / (2. * len(X)) * linalg.norm(y - np.dot(X, coef)) ** 2
|
||||
+ alpha * linalg.norm(coef, 1))
|
||||
|
||||
lars = linear_model.LassoLars(alpha=alpha, normalize=False)
|
||||
assert_warns(ConvergenceWarning, lars.fit, X, y)
|
||||
lars_coef_ = lars.coef_
|
||||
lars_obj = objective_function(lars_coef_)
|
||||
|
||||
coord_descent = linear_model.Lasso(alpha=alpha, tol=1e-4, normalize=False)
|
||||
cd_coef_ = coord_descent.fit(X, y).coef_
|
||||
cd_obj = objective_function(cd_coef_)
|
||||
|
||||
assert lars_obj < cd_obj * (1. + 1e-8)
|
||||
|
||||
|
||||
def test_lars_add_features():
|
||||
# assure that at least some features get added if necessary
|
||||
# test for 6d2b4c
|
||||
# Hilbert matrix
|
||||
n = 5
|
||||
H = 1. / (np.arange(1, n + 1) + np.arange(n)[:, np.newaxis])
|
||||
clf = linear_model.Lars(fit_intercept=False).fit(
|
||||
H, np.arange(n))
|
||||
assert np.all(np.isfinite(clf.coef_))
|
||||
|
||||
|
||||
def test_lars_n_nonzero_coefs(verbose=False):
|
||||
lars = linear_model.Lars(n_nonzero_coefs=6, verbose=verbose)
|
||||
lars.fit(X, y)
|
||||
assert len(lars.coef_.nonzero()[0]) == 6
|
||||
# The path should be of length 6 + 1 in a Lars going down to 6
|
||||
# non-zero coefs
|
||||
assert len(lars.alphas_) == 7
|
||||
|
||||
|
||||
@ignore_warnings
|
||||
def test_multitarget():
|
||||
# Assure that estimators receiving multidimensional y do the right thing
|
||||
Y = np.vstack([y, y ** 2]).T
|
||||
n_targets = Y.shape[1]
|
||||
estimators = [
|
||||
linear_model.LassoLars(),
|
||||
linear_model.Lars(),
|
||||
# regression test for gh-1615
|
||||
linear_model.LassoLars(fit_intercept=False),
|
||||
linear_model.Lars(fit_intercept=False),
|
||||
]
|
||||
|
||||
for estimator in estimators:
|
||||
estimator.fit(X, Y)
|
||||
Y_pred = estimator.predict(X)
|
||||
alphas, active, coef, path = (estimator.alphas_, estimator.active_,
|
||||
estimator.coef_, estimator.coef_path_)
|
||||
for k in range(n_targets):
|
||||
estimator.fit(X, Y[:, k])
|
||||
y_pred = estimator.predict(X)
|
||||
assert_array_almost_equal(alphas[k], estimator.alphas_)
|
||||
assert_array_almost_equal(active[k], estimator.active_)
|
||||
assert_array_almost_equal(coef[k], estimator.coef_)
|
||||
assert_array_almost_equal(path[k], estimator.coef_path_)
|
||||
assert_array_almost_equal(Y_pred[:, k], y_pred)
|
||||
|
||||
|
||||
def test_lars_cv():
|
||||
# Test the LassoLarsCV object by checking that the optimal alpha
|
||||
# increases as the number of samples increases.
|
||||
# This property is not actually guaranteed in general and is just a
|
||||
# property of the given dataset, with the given steps chosen.
|
||||
old_alpha = 0
|
||||
lars_cv = linear_model.LassoLarsCV()
|
||||
for length in (400, 200, 100):
|
||||
X = diabetes.data[:length]
|
||||
y = diabetes.target[:length]
|
||||
lars_cv.fit(X, y)
|
||||
np.testing.assert_array_less(old_alpha, lars_cv.alpha_)
|
||||
old_alpha = lars_cv.alpha_
|
||||
assert not hasattr(lars_cv, 'n_nonzero_coefs')
|
||||
|
||||
|
||||
def test_lars_cv_max_iter(recwarn):
|
||||
warnings.simplefilter('always')
|
||||
with np.errstate(divide='raise', invalid='raise'):
|
||||
X = diabetes.data
|
||||
y = diabetes.target
|
||||
rng = np.random.RandomState(42)
|
||||
x = rng.randn(len(y))
|
||||
X = diabetes.data
|
||||
X = np.c_[X, x, x] # add correlated features
|
||||
lars_cv = linear_model.LassoLarsCV(max_iter=5, cv=5)
|
||||
lars_cv.fit(X, y)
|
||||
# Check that there is no warning in general and no ConvergenceWarning
|
||||
# in particular.
|
||||
# Materialize the string representation of the warning to get a more
|
||||
# informative error message in case of AssertionError.
|
||||
recorded_warnings = [str(w) for w in recwarn]
|
||||
assert recorded_warnings == []
|
||||
|
||||
|
||||
def test_lasso_lars_ic():
|
||||
# Test the LassoLarsIC object by checking that
|
||||
# - some good features are selected.
|
||||
# - alpha_bic > alpha_aic
|
||||
# - n_nonzero_bic < n_nonzero_aic
|
||||
lars_bic = linear_model.LassoLarsIC('bic')
|
||||
lars_aic = linear_model.LassoLarsIC('aic')
|
||||
rng = np.random.RandomState(42)
|
||||
X = diabetes.data
|
||||
X = np.c_[X, rng.randn(X.shape[0], 5)] # add 5 bad features
|
||||
lars_bic.fit(X, y)
|
||||
lars_aic.fit(X, y)
|
||||
nonzero_bic = np.where(lars_bic.coef_)[0]
|
||||
nonzero_aic = np.where(lars_aic.coef_)[0]
|
||||
assert lars_bic.alpha_ > lars_aic.alpha_
|
||||
assert len(nonzero_bic) < len(nonzero_aic)
|
||||
assert np.max(nonzero_bic) < diabetes.data.shape[1]
|
||||
|
||||
# test error on unknown IC
|
||||
lars_broken = linear_model.LassoLarsIC('<unknown>')
|
||||
assert_raises(ValueError, lars_broken.fit, X, y)
|
||||
|
||||
|
||||
def test_lars_path_readonly_data():
|
||||
# When using automated memory mapping on large input, the
|
||||
# fold data is in read-only mode
|
||||
# This is a non-regression test for:
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/4597
|
||||
splitted_data = train_test_split(X, y, random_state=42)
|
||||
with TempMemmap(splitted_data) as (X_train, X_test, y_train, y_test):
|
||||
# The following should not fail despite copy=False
|
||||
_lars_path_residues(X_train, y_train, X_test, y_test, copy=False)
|
||||
|
||||
|
||||
def test_lars_path_positive_constraint():
|
||||
# this is the main test for the positive parameter on the lars_path method
|
||||
# the estimator classes just make use of this function
|
||||
|
||||
# we do the test on the diabetes dataset
|
||||
|
||||
# ensure that we get negative coefficients when positive=False
|
||||
# and all positive when positive=True
|
||||
# for method 'lar' (default) and lasso
|
||||
|
||||
err_msg = "Positive constraint not supported for 'lar' coding method."
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
linear_model.lars_path(diabetes['data'], diabetes['target'],
|
||||
method='lar', positive=True)
|
||||
|
||||
method = 'lasso'
|
||||
_, _, coefs = \
|
||||
linear_model.lars_path(X, y, return_path=True, method=method,
|
||||
positive=False)
|
||||
assert coefs.min() < 0
|
||||
|
||||
_, _, coefs = \
|
||||
linear_model.lars_path(X, y, return_path=True, method=method,
|
||||
positive=True)
|
||||
assert coefs.min() >= 0
|
||||
|
||||
|
||||
# now we gonna test the positive option for all estimator classes
|
||||
|
||||
default_parameter = {'fit_intercept': False}
|
||||
|
||||
estimator_parameter_map = {'LassoLars': {'alpha': 0.1},
|
||||
'LassoLarsCV': {},
|
||||
'LassoLarsIC': {}}
|
||||
|
||||
|
||||
def test_estimatorclasses_positive_constraint():
|
||||
# testing the transmissibility for the positive option of all estimator
|
||||
# classes in this same function here
|
||||
default_parameter = {'fit_intercept': False}
|
||||
|
||||
estimator_parameter_map = {'LassoLars': {'alpha': 0.1},
|
||||
'LassoLarsCV': {},
|
||||
'LassoLarsIC': {}}
|
||||
for estname in estimator_parameter_map:
|
||||
params = default_parameter.copy()
|
||||
params.update(estimator_parameter_map[estname])
|
||||
estimator = getattr(linear_model, estname)(positive=False, **params)
|
||||
estimator.fit(X, y)
|
||||
assert estimator.coef_.min() < 0
|
||||
estimator = getattr(linear_model, estname)(positive=True, **params)
|
||||
estimator.fit(X, y)
|
||||
assert min(estimator.coef_) >= 0
|
||||
|
||||
|
||||
def test_lasso_lars_vs_lasso_cd_positive():
|
||||
# Test that LassoLars and Lasso using coordinate descent give the
|
||||
# same results when using the positive option
|
||||
|
||||
# This test is basically a copy of the above with additional positive
|
||||
# option. However for the middle part, the comparison of coefficient values
|
||||
# for a range of alphas, we had to make an adaptations. See below.
|
||||
|
||||
# not normalized data
|
||||
X = 3 * diabetes.data
|
||||
|
||||
alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
|
||||
positive=True)
|
||||
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True)
|
||||
for c, a in zip(lasso_path.T, alphas):
|
||||
if a == 0:
|
||||
continue
|
||||
lasso_cd.alpha = a
|
||||
lasso_cd.fit(X, y)
|
||||
error = linalg.norm(c - lasso_cd.coef_)
|
||||
assert error < 0.01
|
||||
|
||||
# The range of alphas chosen for coefficient comparison here is restricted
|
||||
# as compared with the above test without the positive option. This is due
|
||||
# to the circumstance that the Lars-Lasso algorithm does not converge to
|
||||
# the least-squares-solution for small alphas, see 'Least Angle Regression'
|
||||
# by Efron et al 2004. The coefficients are typically in congruence up to
|
||||
# the smallest alpha reached by the Lars-Lasso algorithm and start to
|
||||
# diverge thereafter. See
|
||||
# https://gist.github.com/michigraber/7e7d7c75eca694c7a6ff
|
||||
|
||||
for alpha in np.linspace(6e-1, 1 - 1e-2, 20):
|
||||
clf1 = linear_model.LassoLars(fit_intercept=False, alpha=alpha,
|
||||
normalize=False, positive=True).fit(X, y)
|
||||
clf2 = linear_model.Lasso(fit_intercept=False, alpha=alpha, tol=1e-8,
|
||||
normalize=False, positive=True).fit(X, y)
|
||||
err = linalg.norm(clf1.coef_ - clf2.coef_)
|
||||
assert err < 1e-3
|
||||
|
||||
# normalized data
|
||||
X = diabetes.data
|
||||
alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
|
||||
positive=True)
|
||||
lasso_cd = linear_model.Lasso(fit_intercept=False, normalize=True,
|
||||
tol=1e-8, positive=True)
|
||||
for c, a in zip(lasso_path.T[:-1], alphas[:-1]): # don't include alpha=0
|
||||
lasso_cd.alpha = a
|
||||
lasso_cd.fit(X, y)
|
||||
error = linalg.norm(c - lasso_cd.coef_)
|
||||
assert error < 0.01
|
||||
|
||||
|
||||
def test_lasso_lars_vs_R_implementation():
|
||||
# Test that sklearn LassoLars implementation agrees with the LassoLars
|
||||
# implementation available in R (lars library) under the following
|
||||
# scenarios:
|
||||
# 1) fit_intercept=False and normalize=False
|
||||
# 2) fit_intercept=True and normalize=True
|
||||
|
||||
# Let's generate the data used in the bug report 7778
|
||||
y = np.array([-6.45006793, -3.51251449, -8.52445396, 6.12277822,
|
||||
-19.42109366])
|
||||
x = np.array([[0.47299829, 0, 0, 0, 0],
|
||||
[0.08239882, 0.85784863, 0, 0, 0],
|
||||
[0.30114139, -0.07501577, 0.80895216, 0, 0],
|
||||
[-0.01460346, -0.1015233, 0.0407278, 0.80338378, 0],
|
||||
[-0.69363927, 0.06754067, 0.18064514, -0.0803561,
|
||||
0.40427291]])
|
||||
|
||||
X = x.T
|
||||
|
||||
###########################################################################
|
||||
# Scenario 1: Let's compare R vs sklearn when fit_intercept=False and
|
||||
# normalize=False
|
||||
###########################################################################
|
||||
#
|
||||
# The R result was obtained using the following code:
|
||||
#
|
||||
# library(lars)
|
||||
# model_lasso_lars = lars(X, t(y), type="lasso", intercept=FALSE,
|
||||
# trace=TRUE, normalize=FALSE)
|
||||
# r = t(model_lasso_lars$beta)
|
||||
#
|
||||
|
||||
r = np.array([[0, 0, 0, 0, 0, -79.810362809499026, -83.528788732782829,
|
||||
-83.777653739190711, -83.784156932888934,
|
||||
-84.033390591756657],
|
||||
[0, 0, 0, 0, -0.476624256777266, 0, 0, 0, 0,
|
||||
0.025219751009936],
|
||||
[0, -3.577397088285891, -4.702795355871871,
|
||||
-7.016748621359461, -7.614898471899412, -0.336938391359179,
|
||||
0, 0, 0.001213370600853, 0.048162321585148],
|
||||
[0, 0, 0, 2.231558436628169, 2.723267514525966,
|
||||
2.811549786389614, 2.813766976061531, 2.817462468949557,
|
||||
2.817368178703816, 2.816221090636795],
|
||||
[0, 0, -1.218422599914637, -3.457726183014808,
|
||||
-4.021304522060710, -45.827461592423745,
|
||||
-47.776608869312305,
|
||||
-47.911561610746404, -47.914845922736234,
|
||||
-48.039562334265717]])
|
||||
|
||||
model_lasso_lars = linear_model.LassoLars(alpha=0, fit_intercept=False,
|
||||
normalize=False)
|
||||
model_lasso_lars.fit(X, y)
|
||||
skl_betas = model_lasso_lars.coef_path_
|
||||
|
||||
assert_array_almost_equal(r, skl_betas, decimal=12)
|
||||
###########################################################################
|
||||
|
||||
###########################################################################
|
||||
# Scenario 2: Let's compare R vs sklearn when fit_intercept=True and
|
||||
# normalize=True
|
||||
#
|
||||
# Note: When normalize is equal to True, R returns the coefficients in
|
||||
# their original units, that is, they are rescaled back, whereas sklearn
|
||||
# does not do that, therefore, we need to do this step before comparing
|
||||
# their results.
|
||||
###########################################################################
|
||||
#
|
||||
# The R result was obtained using the following code:
|
||||
#
|
||||
# library(lars)
|
||||
# model_lasso_lars2 = lars(X, t(y), type="lasso", intercept=TRUE,
|
||||
# trace=TRUE, normalize=TRUE)
|
||||
# r2 = t(model_lasso_lars2$beta)
|
||||
|
||||
r2 = np.array([[0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 8.371887668009453, 19.463768371044026],
|
||||
[0, 0, 0, 0, 9.901611055290553],
|
||||
[0, 7.495923132833733, 9.245133544334507,
|
||||
17.389369207545062, 26.971656815643499],
|
||||
[0, 0, -1.569380717440311, -5.924804108067312,
|
||||
-7.996385265061972]])
|
||||
|
||||
model_lasso_lars2 = linear_model.LassoLars(alpha=0, normalize=True)
|
||||
model_lasso_lars2.fit(X, y)
|
||||
skl_betas2 = model_lasso_lars2.coef_path_
|
||||
|
||||
# Let's rescale back the coefficients returned by sklearn before comparing
|
||||
# against the R result (read the note above)
|
||||
temp = X - np.mean(X, axis=0)
|
||||
normx = np.sqrt(np.sum(temp ** 2, axis=0))
|
||||
skl_betas2 /= normx[:, np.newaxis]
|
||||
|
||||
assert_array_almost_equal(r2, skl_betas2, decimal=12)
|
||||
###########################################################################
|
||||
|
||||
|
||||
@pytest.mark.parametrize('copy_X', [True, False])
|
||||
def test_lasso_lars_copyX_behaviour(copy_X):
|
||||
"""
|
||||
Test that user input regarding copy_X is not being overridden (it was until
|
||||
at least version 0.21)
|
||||
|
||||
"""
|
||||
lasso_lars = LassoLarsIC(copy_X=copy_X, precompute=False)
|
||||
rng = np.random.RandomState(0)
|
||||
X = rng.normal(0, 1, (100, 5))
|
||||
X_copy = X.copy()
|
||||
y = X[:, 2]
|
||||
lasso_lars.fit(X, y)
|
||||
assert copy_X == np.array_equal(X, X_copy)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('copy_X', [True, False])
|
||||
def test_lasso_lars_fit_copyX_behaviour(copy_X):
|
||||
"""
|
||||
Test that user input to .fit for copy_X overrides default __init__ value
|
||||
|
||||
"""
|
||||
lasso_lars = LassoLarsIC(precompute=False)
|
||||
rng = np.random.RandomState(0)
|
||||
X = rng.normal(0, 1, (100, 5))
|
||||
X_copy = X.copy()
|
||||
y = X[:, 2]
|
||||
lasso_lars.fit(X, y, copy_X=copy_X)
|
||||
assert copy_X == np.array_equal(X, X_copy)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('est', (LassoLars(alpha=1e-3), Lars()))
|
||||
def test_lars_with_jitter(est):
|
||||
# Test that a small amount of jitter helps stability,
|
||||
# using example provided in issue #2746
|
||||
|
||||
X = np.array([[0.0, 0.0, 0.0, -1.0, 0.0],
|
||||
[0.0, -1.0, 0.0, 0.0, 0.0]])
|
||||
y = [-2.5, -2.5]
|
||||
expected_coef = [0, 2.5, 0, 2.5, 0]
|
||||
|
||||
# set to fit_intercept to False since target is constant and we want check
|
||||
# the value of coef. coef would be all zeros otherwise.
|
||||
est.set_params(fit_intercept=False)
|
||||
est_jitter = clone(est).set_params(jitter=10e-8, random_state=0)
|
||||
|
||||
est.fit(X, y)
|
||||
est_jitter.fit(X, y)
|
||||
|
||||
assert np.mean((est.coef_ - est_jitter.coef_)**2) > .1
|
||||
np.testing.assert_allclose(est_jitter.coef_, expected_coef, rtol=1e-3)
|
||||
|
||||
|
||||
def test_X_none_gram_not_none():
|
||||
with pytest.raises(ValueError,
|
||||
match="X cannot be None if Gram is not None"):
|
||||
lars_path(X=None, y=[1], Gram='not None')
|
||||
|
||||
|
||||
def test_copy_X_with_auto_gram():
|
||||
# Non-regression test for #17789, `copy_X=True` and Gram='auto' does not
|
||||
# overwrite X
|
||||
rng = np.random.RandomState(42)
|
||||
X = rng.rand(6, 6)
|
||||
y = rng.rand(6)
|
||||
|
||||
X_before = X.copy()
|
||||
linear_model.lars_path(X, y, Gram='auto', copy_X=True, method='lasso')
|
||||
# X did not change
|
||||
assert_allclose(X, X_before)
|
1829
venv/Lib/site-packages/sklearn/linear_model/tests/test_logistic.py
Normal file
1829
venv/Lib/site-packages/sklearn/linear_model/tests/test_logistic.py
Normal file
File diff suppressed because it is too large
Load diff
232
venv/Lib/site-packages/sklearn/linear_model/tests/test_omp.py
Normal file
232
venv/Lib/site-packages/sklearn/linear_model/tests/test_omp.py
Normal file
|
@ -0,0 +1,232 @@
|
|||
# Author: Vlad Niculae
|
||||
# License: BSD 3 clause
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sklearn.utils._testing import assert_raises
|
||||
from sklearn.utils._testing import assert_array_equal
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_warns
|
||||
from sklearn.utils._testing import ignore_warnings
|
||||
|
||||
|
||||
from sklearn.linear_model import (orthogonal_mp, orthogonal_mp_gram,
|
||||
OrthogonalMatchingPursuit,
|
||||
OrthogonalMatchingPursuitCV,
|
||||
LinearRegression)
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.datasets import make_sparse_coded_signal
|
||||
|
||||
n_samples, n_features, n_nonzero_coefs, n_targets = 25, 35, 5, 3
|
||||
y, X, gamma = make_sparse_coded_signal(n_samples=n_targets,
|
||||
n_components=n_features,
|
||||
n_features=n_samples,
|
||||
n_nonzero_coefs=n_nonzero_coefs,
|
||||
random_state=0)
|
||||
# Make X not of norm 1 for testing
|
||||
X *= 10
|
||||
y *= 10
|
||||
G, Xy = np.dot(X.T, X), np.dot(X.T, y)
|
||||
# this makes X (n_samples, n_features)
|
||||
# and y (n_samples, 3)
|
||||
|
||||
|
||||
def test_correct_shapes():
|
||||
assert (orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5).shape ==
|
||||
(n_features,))
|
||||
assert (orthogonal_mp(X, y, n_nonzero_coefs=5).shape ==
|
||||
(n_features, 3))
|
||||
|
||||
|
||||
def test_correct_shapes_gram():
|
||||
assert (orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5).shape ==
|
||||
(n_features,))
|
||||
assert (orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5).shape ==
|
||||
(n_features, 3))
|
||||
|
||||
|
||||
def test_n_nonzero_coefs():
|
||||
assert np.count_nonzero(orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5)) <= 5
|
||||
assert np.count_nonzero(orthogonal_mp(X, y[:, 0],
|
||||
n_nonzero_coefs=5,
|
||||
precompute=True)) <= 5
|
||||
|
||||
|
||||
def test_tol():
|
||||
tol = 0.5
|
||||
gamma = orthogonal_mp(X, y[:, 0], tol=tol)
|
||||
gamma_gram = orthogonal_mp(X, y[:, 0], tol=tol, precompute=True)
|
||||
assert np.sum((y[:, 0] - np.dot(X, gamma)) ** 2) <= tol
|
||||
assert np.sum((y[:, 0] - np.dot(X, gamma_gram)) ** 2) <= tol
|
||||
|
||||
|
||||
def test_with_without_gram():
|
||||
assert_array_almost_equal(
|
||||
orthogonal_mp(X, y, n_nonzero_coefs=5),
|
||||
orthogonal_mp(X, y, n_nonzero_coefs=5, precompute=True))
|
||||
|
||||
|
||||
def test_with_without_gram_tol():
|
||||
assert_array_almost_equal(
|
||||
orthogonal_mp(X, y, tol=1.),
|
||||
orthogonal_mp(X, y, tol=1., precompute=True))
|
||||
|
||||
|
||||
def test_unreachable_accuracy():
|
||||
assert_array_almost_equal(
|
||||
orthogonal_mp(X, y, tol=0),
|
||||
orthogonal_mp(X, y, n_nonzero_coefs=n_features))
|
||||
|
||||
assert_array_almost_equal(
|
||||
assert_warns(RuntimeWarning, orthogonal_mp, X, y, tol=0,
|
||||
precompute=True),
|
||||
orthogonal_mp(X, y, precompute=True,
|
||||
n_nonzero_coefs=n_features))
|
||||
|
||||
|
||||
def test_bad_input():
|
||||
assert_raises(ValueError, orthogonal_mp, X, y, tol=-1)
|
||||
assert_raises(ValueError, orthogonal_mp, X, y, n_nonzero_coefs=-1)
|
||||
assert_raises(ValueError, orthogonal_mp, X, y,
|
||||
n_nonzero_coefs=n_features + 1)
|
||||
assert_raises(ValueError, orthogonal_mp_gram, G, Xy, tol=-1)
|
||||
assert_raises(ValueError, orthogonal_mp_gram, G, Xy, n_nonzero_coefs=-1)
|
||||
assert_raises(ValueError, orthogonal_mp_gram, G, Xy,
|
||||
n_nonzero_coefs=n_features + 1)
|
||||
|
||||
|
||||
def test_perfect_signal_recovery():
|
||||
idx, = gamma[:, 0].nonzero()
|
||||
gamma_rec = orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5)
|
||||
gamma_gram = orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5)
|
||||
assert_array_equal(idx, np.flatnonzero(gamma_rec))
|
||||
assert_array_equal(idx, np.flatnonzero(gamma_gram))
|
||||
assert_array_almost_equal(gamma[:, 0], gamma_rec, decimal=2)
|
||||
assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
|
||||
|
||||
|
||||
def test_orthogonal_mp_gram_readonly():
|
||||
# Non-regression test for:
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/5956
|
||||
idx, = gamma[:, 0].nonzero()
|
||||
G_readonly = G.copy()
|
||||
G_readonly.setflags(write=False)
|
||||
Xy_readonly = Xy.copy()
|
||||
Xy_readonly.setflags(write=False)
|
||||
gamma_gram = orthogonal_mp_gram(G_readonly, Xy_readonly[:, 0],
|
||||
n_nonzero_coefs=5,
|
||||
copy_Gram=False, copy_Xy=False)
|
||||
assert_array_equal(idx, np.flatnonzero(gamma_gram))
|
||||
assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
|
||||
|
||||
|
||||
def test_estimator():
|
||||
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
|
||||
omp.fit(X, y[:, 0])
|
||||
assert omp.coef_.shape == (n_features,)
|
||||
assert omp.intercept_.shape == ()
|
||||
assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
|
||||
|
||||
omp.fit(X, y)
|
||||
assert omp.coef_.shape == (n_targets, n_features)
|
||||
assert omp.intercept_.shape == (n_targets,)
|
||||
assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
|
||||
|
||||
coef_normalized = omp.coef_[0].copy()
|
||||
omp.set_params(fit_intercept=True, normalize=False)
|
||||
omp.fit(X, y[:, 0])
|
||||
assert_array_almost_equal(coef_normalized, omp.coef_)
|
||||
|
||||
omp.set_params(fit_intercept=False, normalize=False)
|
||||
omp.fit(X, y[:, 0])
|
||||
assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
|
||||
assert omp.coef_.shape == (n_features,)
|
||||
assert omp.intercept_ == 0
|
||||
|
||||
omp.fit(X, y)
|
||||
assert omp.coef_.shape == (n_targets, n_features)
|
||||
assert omp.intercept_ == 0
|
||||
assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
|
||||
|
||||
|
||||
def test_identical_regressors():
|
||||
newX = X.copy()
|
||||
newX[:, 1] = newX[:, 0]
|
||||
gamma = np.zeros(n_features)
|
||||
gamma[0] = gamma[1] = 1.
|
||||
newy = np.dot(newX, gamma)
|
||||
assert_warns(RuntimeWarning, orthogonal_mp, newX, newy, 2)
|
||||
|
||||
|
||||
def test_swapped_regressors():
|
||||
gamma = np.zeros(n_features)
|
||||
# X[:, 21] should be selected first, then X[:, 0] selected second,
|
||||
# which will take X[:, 21]'s place in case the algorithm does
|
||||
# column swapping for optimization (which is the case at the moment)
|
||||
gamma[21] = 1.0
|
||||
gamma[0] = 0.5
|
||||
new_y = np.dot(X, gamma)
|
||||
new_Xy = np.dot(X.T, new_y)
|
||||
gamma_hat = orthogonal_mp(X, new_y, n_nonzero_coefs=2)
|
||||
gamma_hat_gram = orthogonal_mp_gram(G, new_Xy, n_nonzero_coefs=2)
|
||||
assert_array_equal(np.flatnonzero(gamma_hat), [0, 21])
|
||||
assert_array_equal(np.flatnonzero(gamma_hat_gram), [0, 21])
|
||||
|
||||
|
||||
def test_no_atoms():
|
||||
y_empty = np.zeros_like(y)
|
||||
Xy_empty = np.dot(X.T, y_empty)
|
||||
gamma_empty = ignore_warnings(orthogonal_mp)(X, y_empty,
|
||||
n_nonzero_coefs=1)
|
||||
gamma_empty_gram = ignore_warnings(orthogonal_mp)(G, Xy_empty,
|
||||
n_nonzero_coefs=1)
|
||||
assert np.all(gamma_empty == 0)
|
||||
assert np.all(gamma_empty_gram == 0)
|
||||
|
||||
|
||||
def test_omp_path():
|
||||
path = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=True)
|
||||
last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False)
|
||||
assert path.shape == (n_features, n_targets, 5)
|
||||
assert_array_almost_equal(path[:, :, -1], last)
|
||||
path = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=True)
|
||||
last = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=False)
|
||||
assert path.shape == (n_features, n_targets, 5)
|
||||
assert_array_almost_equal(path[:, :, -1], last)
|
||||
|
||||
|
||||
def test_omp_return_path_prop_with_gram():
|
||||
path = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=True,
|
||||
precompute=True)
|
||||
last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False,
|
||||
precompute=True)
|
||||
assert path.shape == (n_features, n_targets, 5)
|
||||
assert_array_almost_equal(path[:, :, -1], last)
|
||||
|
||||
|
||||
def test_omp_cv():
|
||||
y_ = y[:, 0]
|
||||
gamma_ = gamma[:, 0]
|
||||
ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False,
|
||||
max_iter=10)
|
||||
ompcv.fit(X, y_)
|
||||
assert ompcv.n_nonzero_coefs_ == n_nonzero_coefs
|
||||
assert_array_almost_equal(ompcv.coef_, gamma_)
|
||||
omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False,
|
||||
n_nonzero_coefs=ompcv.n_nonzero_coefs_)
|
||||
omp.fit(X, y_)
|
||||
assert_array_almost_equal(ompcv.coef_, omp.coef_)
|
||||
|
||||
|
||||
def test_omp_reaches_least_squares():
|
||||
# Use small simple data; it's a sanity check but OMP can stop early
|
||||
rng = check_random_state(0)
|
||||
n_samples, n_features = (10, 8)
|
||||
n_targets = 3
|
||||
X = rng.randn(n_samples, n_features)
|
||||
Y = rng.randn(n_samples, n_targets)
|
||||
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_features)
|
||||
lstsq = LinearRegression()
|
||||
omp.fit(X, Y)
|
||||
lstsq.fit(X, Y)
|
||||
assert_array_almost_equal(omp.coef_, lstsq.coef_)
|
|
@ -0,0 +1,281 @@
|
|||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
|
||||
import pytest
|
||||
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_array_equal
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
from sklearn.utils._testing import assert_raises
|
||||
|
||||
from sklearn.base import ClassifierMixin
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.linear_model import PassiveAggressiveClassifier
|
||||
from sklearn.linear_model import PassiveAggressiveRegressor
|
||||
|
||||
iris = load_iris()
|
||||
random_state = check_random_state(12)
|
||||
indices = np.arange(iris.data.shape[0])
|
||||
random_state.shuffle(indices)
|
||||
X = iris.data[indices]
|
||||
y = iris.target[indices]
|
||||
X_csr = sp.csr_matrix(X)
|
||||
|
||||
|
||||
class MyPassiveAggressive(ClassifierMixin):
|
||||
|
||||
def __init__(self, C=1.0, epsilon=0.01, loss="hinge",
|
||||
fit_intercept=True, n_iter=1, random_state=None):
|
||||
self.C = C
|
||||
self.epsilon = epsilon
|
||||
self.loss = loss
|
||||
self.fit_intercept = fit_intercept
|
||||
self.n_iter = n_iter
|
||||
|
||||
def fit(self, X, y):
|
||||
n_samples, n_features = X.shape
|
||||
self.w = np.zeros(n_features, dtype=np.float64)
|
||||
self.b = 0.0
|
||||
|
||||
for t in range(self.n_iter):
|
||||
for i in range(n_samples):
|
||||
p = self.project(X[i])
|
||||
if self.loss in ("hinge", "squared_hinge"):
|
||||
loss = max(1 - y[i] * p, 0)
|
||||
else:
|
||||
loss = max(np.abs(p - y[i]) - self.epsilon, 0)
|
||||
|
||||
sqnorm = np.dot(X[i], X[i])
|
||||
|
||||
if self.loss in ("hinge", "epsilon_insensitive"):
|
||||
step = min(self.C, loss / sqnorm)
|
||||
elif self.loss in ("squared_hinge",
|
||||
"squared_epsilon_insensitive"):
|
||||
step = loss / (sqnorm + 1.0 / (2 * self.C))
|
||||
|
||||
if self.loss in ("hinge", "squared_hinge"):
|
||||
step *= y[i]
|
||||
else:
|
||||
step *= np.sign(y[i] - p)
|
||||
|
||||
self.w += step * X[i]
|
||||
if self.fit_intercept:
|
||||
self.b += step
|
||||
|
||||
def project(self, X):
|
||||
return np.dot(X, self.w) + self.b
|
||||
|
||||
|
||||
def test_classifier_accuracy():
|
||||
for data in (X, X_csr):
|
||||
for fit_intercept in (True, False):
|
||||
for average in (False, True):
|
||||
clf = PassiveAggressiveClassifier(
|
||||
C=1.0, max_iter=30, fit_intercept=fit_intercept,
|
||||
random_state=1, average=average, tol=None)
|
||||
clf.fit(data, y)
|
||||
score = clf.score(data, y)
|
||||
assert score > 0.79
|
||||
if average:
|
||||
assert hasattr(clf, '_average_coef')
|
||||
assert hasattr(clf, '_average_intercept')
|
||||
assert hasattr(clf, '_standard_intercept')
|
||||
assert hasattr(clf, '_standard_coef')
|
||||
|
||||
|
||||
def test_classifier_partial_fit():
|
||||
classes = np.unique(y)
|
||||
for data in (X, X_csr):
|
||||
for average in (False, True):
|
||||
clf = PassiveAggressiveClassifier(random_state=0,
|
||||
average=average,
|
||||
max_iter=5)
|
||||
for t in range(30):
|
||||
clf.partial_fit(data, y, classes)
|
||||
score = clf.score(data, y)
|
||||
assert score > 0.79
|
||||
if average:
|
||||
assert hasattr(clf, '_average_coef')
|
||||
assert hasattr(clf, '_average_intercept')
|
||||
assert hasattr(clf, '_standard_intercept')
|
||||
assert hasattr(clf, '_standard_coef')
|
||||
|
||||
|
||||
def test_classifier_refit():
|
||||
# Classifier can be retrained on different labels and features.
|
||||
clf = PassiveAggressiveClassifier(max_iter=5).fit(X, y)
|
||||
assert_array_equal(clf.classes_, np.unique(y))
|
||||
|
||||
clf.fit(X[:, :-1], iris.target_names[y])
|
||||
assert_array_equal(clf.classes_, iris.target_names)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('loss', ("hinge", "squared_hinge"))
|
||||
def test_classifier_correctness(loss):
|
||||
y_bin = y.copy()
|
||||
y_bin[y != 1] = -1
|
||||
|
||||
clf1 = MyPassiveAggressive(loss=loss, n_iter=2)
|
||||
clf1.fit(X, y_bin)
|
||||
|
||||
for data in (X, X_csr):
|
||||
clf2 = PassiveAggressiveClassifier(loss=loss, max_iter=2,
|
||||
shuffle=False, tol=None)
|
||||
clf2.fit(data, y_bin)
|
||||
|
||||
assert_array_almost_equal(clf1.w, clf2.coef_.ravel(), decimal=2)
|
||||
|
||||
|
||||
def test_classifier_undefined_methods():
|
||||
clf = PassiveAggressiveClassifier(max_iter=100)
|
||||
for meth in ("predict_proba", "predict_log_proba", "transform"):
|
||||
assert_raises(AttributeError, lambda x: getattr(clf, x), meth)
|
||||
|
||||
|
||||
def test_class_weights():
|
||||
# Test class weights.
|
||||
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
|
||||
[1.0, 1.0], [1.0, 0.0]])
|
||||
y2 = [1, 1, 1, -1, -1]
|
||||
|
||||
clf = PassiveAggressiveClassifier(C=0.1, max_iter=100, class_weight=None,
|
||||
random_state=100)
|
||||
clf.fit(X2, y2)
|
||||
assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))
|
||||
|
||||
# we give a small weights to class 1
|
||||
clf = PassiveAggressiveClassifier(C=0.1, max_iter=100,
|
||||
class_weight={1: 0.001},
|
||||
random_state=100)
|
||||
clf.fit(X2, y2)
|
||||
|
||||
# now the hyperplane should rotate clock-wise and
|
||||
# the prediction on this point should shift
|
||||
assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))
|
||||
|
||||
|
||||
def test_partial_fit_weight_class_balanced():
|
||||
# partial_fit with class_weight='balanced' not supported
|
||||
clf = PassiveAggressiveClassifier(class_weight="balanced", max_iter=100)
|
||||
assert_raises(ValueError, clf.partial_fit, X, y, classes=np.unique(y))
|
||||
|
||||
|
||||
def test_equal_class_weight():
|
||||
X2 = [[1, 0], [1, 0], [0, 1], [0, 1]]
|
||||
y2 = [0, 0, 1, 1]
|
||||
clf = PassiveAggressiveClassifier(
|
||||
C=0.1, max_iter=1000, tol=None, class_weight=None)
|
||||
clf.fit(X2, y2)
|
||||
|
||||
# Already balanced, so "balanced" weights should have no effect
|
||||
clf_balanced = PassiveAggressiveClassifier(
|
||||
C=0.1, max_iter=1000, tol=None, class_weight="balanced")
|
||||
clf_balanced.fit(X2, y2)
|
||||
|
||||
clf_weighted = PassiveAggressiveClassifier(
|
||||
C=0.1, max_iter=1000, tol=None, class_weight={0: 0.5, 1: 0.5})
|
||||
clf_weighted.fit(X2, y2)
|
||||
|
||||
# should be similar up to some epsilon due to learning rate schedule
|
||||
assert_almost_equal(clf.coef_, clf_weighted.coef_, decimal=2)
|
||||
assert_almost_equal(clf.coef_, clf_balanced.coef_, decimal=2)
|
||||
|
||||
|
||||
def test_wrong_class_weight_label():
|
||||
# ValueError due to wrong class_weight label.
|
||||
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
|
||||
[1.0, 1.0], [1.0, 0.0]])
|
||||
y2 = [1, 1, 1, -1, -1]
|
||||
|
||||
clf = PassiveAggressiveClassifier(class_weight={0: 0.5}, max_iter=100)
|
||||
assert_raises(ValueError, clf.fit, X2, y2)
|
||||
|
||||
|
||||
def test_wrong_class_weight_format():
|
||||
# ValueError due to wrong class_weight argument type.
|
||||
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
|
||||
[1.0, 1.0], [1.0, 0.0]])
|
||||
y2 = [1, 1, 1, -1, -1]
|
||||
|
||||
clf = PassiveAggressiveClassifier(class_weight=[0.5], max_iter=100)
|
||||
assert_raises(ValueError, clf.fit, X2, y2)
|
||||
|
||||
clf = PassiveAggressiveClassifier(class_weight="the larch", max_iter=100)
|
||||
assert_raises(ValueError, clf.fit, X2, y2)
|
||||
|
||||
|
||||
def test_regressor_mse():
|
||||
y_bin = y.copy()
|
||||
y_bin[y != 1] = -1
|
||||
|
||||
for data in (X, X_csr):
|
||||
for fit_intercept in (True, False):
|
||||
for average in (False, True):
|
||||
reg = PassiveAggressiveRegressor(
|
||||
C=1.0, fit_intercept=fit_intercept,
|
||||
random_state=0, average=average, max_iter=5)
|
||||
reg.fit(data, y_bin)
|
||||
pred = reg.predict(data)
|
||||
assert np.mean((pred - y_bin) ** 2) < 1.7
|
||||
if average:
|
||||
assert hasattr(reg, '_average_coef')
|
||||
assert hasattr(reg, '_average_intercept')
|
||||
assert hasattr(reg, '_standard_intercept')
|
||||
assert hasattr(reg, '_standard_coef')
|
||||
|
||||
|
||||
def test_regressor_partial_fit():
|
||||
y_bin = y.copy()
|
||||
y_bin[y != 1] = -1
|
||||
|
||||
for data in (X, X_csr):
|
||||
for average in (False, True):
|
||||
reg = PassiveAggressiveRegressor(random_state=0,
|
||||
average=average, max_iter=100)
|
||||
for t in range(50):
|
||||
reg.partial_fit(data, y_bin)
|
||||
pred = reg.predict(data)
|
||||
assert np.mean((pred - y_bin) ** 2) < 1.7
|
||||
if average:
|
||||
assert hasattr(reg, '_average_coef')
|
||||
assert hasattr(reg, '_average_intercept')
|
||||
assert hasattr(reg, '_standard_intercept')
|
||||
assert hasattr(reg, '_standard_coef')
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'loss',
|
||||
("epsilon_insensitive", "squared_epsilon_insensitive"))
|
||||
def test_regressor_correctness(loss):
|
||||
y_bin = y.copy()
|
||||
y_bin[y != 1] = -1
|
||||
|
||||
reg1 = MyPassiveAggressive(loss=loss, n_iter=2)
|
||||
reg1.fit(X, y_bin)
|
||||
|
||||
for data in (X, X_csr):
|
||||
reg2 = PassiveAggressiveRegressor(tol=None, loss=loss, max_iter=2,
|
||||
shuffle=False)
|
||||
reg2.fit(data, y_bin)
|
||||
|
||||
assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
|
||||
|
||||
|
||||
def test_regressor_undefined_methods():
|
||||
reg = PassiveAggressiveRegressor(max_iter=100)
|
||||
for meth in ("transform",):
|
||||
assert_raises(AttributeError, lambda x: getattr(reg, x), meth)
|
||||
|
||||
# TODO: remove in 0.25
|
||||
@pytest.mark.parametrize('klass', [PassiveAggressiveClassifier,
|
||||
PassiveAggressiveRegressor])
|
||||
def test_passive_aggressive_deprecated_attr(klass):
|
||||
est = klass(average=True)
|
||||
est.fit(X, y)
|
||||
|
||||
msg = "Attribute {} was deprecated"
|
||||
for att in ['average_coef_', 'average_intercept_',
|
||||
'standard_coef_', 'standard_intercept_']:
|
||||
with pytest.warns(FutureWarning, match=msg.format(att)):
|
||||
getattr(est, att)
|
|
@ -0,0 +1,69 @@
|
|||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_raises
|
||||
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.linear_model import Perceptron
|
||||
|
||||
iris = load_iris()
|
||||
random_state = check_random_state(12)
|
||||
indices = np.arange(iris.data.shape[0])
|
||||
random_state.shuffle(indices)
|
||||
X = iris.data[indices]
|
||||
y = iris.target[indices]
|
||||
X_csr = sp.csr_matrix(X)
|
||||
X_csr.sort_indices()
|
||||
|
||||
|
||||
class MyPerceptron:
|
||||
|
||||
def __init__(self, n_iter=1):
|
||||
self.n_iter = n_iter
|
||||
|
||||
def fit(self, X, y):
|
||||
n_samples, n_features = X.shape
|
||||
self.w = np.zeros(n_features, dtype=np.float64)
|
||||
self.b = 0.0
|
||||
|
||||
for t in range(self.n_iter):
|
||||
for i in range(n_samples):
|
||||
if self.predict(X[i])[0] != y[i]:
|
||||
self.w += y[i] * X[i]
|
||||
self.b += y[i]
|
||||
|
||||
def project(self, X):
|
||||
return np.dot(X, self.w) + self.b
|
||||
|
||||
def predict(self, X):
|
||||
X = np.atleast_2d(X)
|
||||
return np.sign(self.project(X))
|
||||
|
||||
|
||||
def test_perceptron_accuracy():
|
||||
for data in (X, X_csr):
|
||||
clf = Perceptron(max_iter=100, tol=None, shuffle=False)
|
||||
clf.fit(data, y)
|
||||
score = clf.score(data, y)
|
||||
assert score > 0.7
|
||||
|
||||
|
||||
def test_perceptron_correctness():
|
||||
y_bin = y.copy()
|
||||
y_bin[y != 1] = -1
|
||||
|
||||
clf1 = MyPerceptron(n_iter=2)
|
||||
clf1.fit(X, y_bin)
|
||||
|
||||
clf2 = Perceptron(max_iter=2, shuffle=False, tol=None)
|
||||
clf2.fit(X, y_bin)
|
||||
|
||||
assert_array_almost_equal(clf1.w, clf2.coef_.ravel())
|
||||
|
||||
|
||||
def test_undefined_methods():
|
||||
clf = Perceptron(max_iter=100)
|
||||
for meth in ("predict_proba", "predict_log_proba"):
|
||||
assert_raises(AttributeError, lambda x: getattr(clf, x), meth)
|
515
venv/Lib/site-packages/sklearn/linear_model/tests/test_ransac.py
Normal file
515
venv/Lib/site-packages/sklearn/linear_model/tests/test_ransac.py
Normal file
|
@ -0,0 +1,515 @@
|
|||
import numpy as np
|
||||
from scipy import sparse
|
||||
|
||||
from numpy.testing import assert_array_almost_equal
|
||||
from numpy.testing import assert_array_equal
|
||||
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.utils._testing import assert_warns
|
||||
from sklearn.utils._testing import assert_raises_regexp
|
||||
from sklearn.utils._testing import assert_raises
|
||||
from sklearn.utils._testing import assert_allclose
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.linear_model import LinearRegression, RANSACRegressor
|
||||
from sklearn.linear_model import OrthogonalMatchingPursuit
|
||||
from sklearn.linear_model._ransac import _dynamic_max_trials
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
|
||||
|
||||
# Generate coordinates of line
|
||||
X = np.arange(-200, 200)
|
||||
y = 0.2 * X + 20
|
||||
data = np.column_stack([X, y])
|
||||
|
||||
# Add some faulty data
|
||||
rng = np.random.RandomState(1000)
|
||||
outliers = np.unique(rng.randint(len(X), size=200))
|
||||
data[outliers, :] += 50 + rng.rand(len(outliers), 2) * 10
|
||||
|
||||
X = data[:, 0][:, np.newaxis]
|
||||
y = data[:, 1]
|
||||
|
||||
|
||||
def test_ransac_inliers_outliers():
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5, random_state=0)
|
||||
|
||||
# Estimate parameters of corrupted data
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
# Ground truth / reference inlier mask
|
||||
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
|
||||
).astype(np.bool_)
|
||||
ref_inlier_mask[outliers] = False
|
||||
|
||||
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
|
||||
|
||||
|
||||
def test_ransac_is_data_valid():
|
||||
def is_data_valid(X, y):
|
||||
assert X.shape[0] == 2
|
||||
assert y.shape[0] == 2
|
||||
return False
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
X = rng.rand(10, 2)
|
||||
y = rng.rand(10, 1)
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5,
|
||||
is_data_valid=is_data_valid,
|
||||
random_state=0)
|
||||
|
||||
assert_raises(ValueError, ransac_estimator.fit, X, y)
|
||||
|
||||
|
||||
def test_ransac_is_model_valid():
|
||||
def is_model_valid(estimator, X, y):
|
||||
assert X.shape[0] == 2
|
||||
assert y.shape[0] == 2
|
||||
return False
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5,
|
||||
is_model_valid=is_model_valid,
|
||||
random_state=0)
|
||||
|
||||
assert_raises(ValueError, ransac_estimator.fit, X, y)
|
||||
|
||||
|
||||
def test_ransac_max_trials():
|
||||
base_estimator = LinearRegression()
|
||||
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5, max_trials=0,
|
||||
random_state=0)
|
||||
assert_raises(ValueError, ransac_estimator.fit, X, y)
|
||||
|
||||
# there is a 1e-9 chance it will take these many trials. No good reason
|
||||
# 1e-2 isn't enough, can still happen
|
||||
# 2 is the what ransac defines as min_samples = X.shape[1] + 1
|
||||
max_trials = _dynamic_max_trials(
|
||||
len(X) - len(outliers), X.shape[0], 2, 1 - 1e-9)
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2)
|
||||
for i in range(50):
|
||||
ransac_estimator.set_params(min_samples=2, random_state=i)
|
||||
ransac_estimator.fit(X, y)
|
||||
assert ransac_estimator.n_trials_ < max_trials + 1
|
||||
|
||||
def test_ransac_stop_n_inliers():
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5, stop_n_inliers=2,
|
||||
random_state=0)
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
assert ransac_estimator.n_trials_ == 1
|
||||
|
||||
|
||||
def test_ransac_stop_score():
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5, stop_score=0,
|
||||
random_state=0)
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
assert ransac_estimator.n_trials_ == 1
|
||||
|
||||
|
||||
def test_ransac_score():
|
||||
X = np.arange(100)[:, None]
|
||||
y = np.zeros((100, ))
|
||||
y[0] = 1
|
||||
y[1] = 100
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=0.5, random_state=0)
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
assert ransac_estimator.score(X[2:], y[2:]) == 1
|
||||
assert ransac_estimator.score(X[:2], y[:2]) < 1
|
||||
|
||||
|
||||
def test_ransac_predict():
|
||||
X = np.arange(100)[:, None]
|
||||
y = np.zeros((100, ))
|
||||
y[0] = 1
|
||||
y[1] = 100
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=0.5, random_state=0)
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
assert_array_equal(ransac_estimator.predict(X), np.zeros(100))
|
||||
|
||||
|
||||
def test_ransac_resid_thresh_no_inliers():
|
||||
# When residual_threshold=0.0 there are no inliers and a
|
||||
# ValueError with a message should be raised
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=0.0, random_state=0,
|
||||
max_trials=5)
|
||||
|
||||
msg = ("RANSAC could not find a valid consensus set")
|
||||
assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
|
||||
assert ransac_estimator.n_skips_no_inliers_ == 5
|
||||
assert ransac_estimator.n_skips_invalid_data_ == 0
|
||||
assert ransac_estimator.n_skips_invalid_model_ == 0
|
||||
|
||||
|
||||
def test_ransac_no_valid_data():
|
||||
def is_data_valid(X, y):
|
||||
return False
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator,
|
||||
is_data_valid=is_data_valid,
|
||||
max_trials=5)
|
||||
|
||||
msg = ("RANSAC could not find a valid consensus set")
|
||||
assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
|
||||
assert ransac_estimator.n_skips_no_inliers_ == 0
|
||||
assert ransac_estimator.n_skips_invalid_data_ == 5
|
||||
assert ransac_estimator.n_skips_invalid_model_ == 0
|
||||
|
||||
|
||||
def test_ransac_no_valid_model():
|
||||
def is_model_valid(estimator, X, y):
|
||||
return False
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator,
|
||||
is_model_valid=is_model_valid,
|
||||
max_trials=5)
|
||||
|
||||
msg = ("RANSAC could not find a valid consensus set")
|
||||
assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
|
||||
assert ransac_estimator.n_skips_no_inliers_ == 0
|
||||
assert ransac_estimator.n_skips_invalid_data_ == 0
|
||||
assert ransac_estimator.n_skips_invalid_model_ == 5
|
||||
|
||||
|
||||
def test_ransac_exceed_max_skips():
|
||||
def is_data_valid(X, y):
|
||||
return False
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator,
|
||||
is_data_valid=is_data_valid,
|
||||
max_trials=5,
|
||||
max_skips=3)
|
||||
|
||||
msg = ("RANSAC skipped more iterations than `max_skips`")
|
||||
assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
|
||||
assert ransac_estimator.n_skips_no_inliers_ == 0
|
||||
assert ransac_estimator.n_skips_invalid_data_ == 4
|
||||
assert ransac_estimator.n_skips_invalid_model_ == 0
|
||||
|
||||
|
||||
def test_ransac_warn_exceed_max_skips():
|
||||
global cause_skip
|
||||
cause_skip = False
|
||||
|
||||
def is_data_valid(X, y):
|
||||
global cause_skip
|
||||
if not cause_skip:
|
||||
cause_skip = True
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator,
|
||||
is_data_valid=is_data_valid,
|
||||
max_skips=3,
|
||||
max_trials=5)
|
||||
|
||||
assert_warns(ConvergenceWarning, ransac_estimator.fit, X, y)
|
||||
assert ransac_estimator.n_skips_no_inliers_ == 0
|
||||
assert ransac_estimator.n_skips_invalid_data_ == 4
|
||||
assert ransac_estimator.n_skips_invalid_model_ == 0
|
||||
|
||||
|
||||
def test_ransac_sparse_coo():
|
||||
X_sparse = sparse.coo_matrix(X)
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5, random_state=0)
|
||||
ransac_estimator.fit(X_sparse, y)
|
||||
|
||||
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
|
||||
).astype(np.bool_)
|
||||
ref_inlier_mask[outliers] = False
|
||||
|
||||
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
|
||||
|
||||
|
||||
def test_ransac_sparse_csr():
|
||||
X_sparse = sparse.csr_matrix(X)
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5, random_state=0)
|
||||
ransac_estimator.fit(X_sparse, y)
|
||||
|
||||
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
|
||||
).astype(np.bool_)
|
||||
ref_inlier_mask[outliers] = False
|
||||
|
||||
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
|
||||
|
||||
|
||||
def test_ransac_sparse_csc():
|
||||
X_sparse = sparse.csc_matrix(X)
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5, random_state=0)
|
||||
ransac_estimator.fit(X_sparse, y)
|
||||
|
||||
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
|
||||
).astype(np.bool_)
|
||||
ref_inlier_mask[outliers] = False
|
||||
|
||||
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
|
||||
|
||||
|
||||
def test_ransac_none_estimator():
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5, random_state=0)
|
||||
ransac_none_estimator = RANSACRegressor(None, min_samples=2,
|
||||
residual_threshold=5,
|
||||
random_state=0)
|
||||
|
||||
ransac_estimator.fit(X, y)
|
||||
ransac_none_estimator.fit(X, y)
|
||||
|
||||
assert_array_almost_equal(ransac_estimator.predict(X),
|
||||
ransac_none_estimator.predict(X))
|
||||
|
||||
|
||||
def test_ransac_min_n_samples():
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator1 = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5, random_state=0)
|
||||
ransac_estimator2 = RANSACRegressor(base_estimator,
|
||||
min_samples=2. / X.shape[0],
|
||||
residual_threshold=5, random_state=0)
|
||||
ransac_estimator3 = RANSACRegressor(base_estimator, min_samples=-1,
|
||||
residual_threshold=5, random_state=0)
|
||||
ransac_estimator4 = RANSACRegressor(base_estimator, min_samples=5.2,
|
||||
residual_threshold=5, random_state=0)
|
||||
ransac_estimator5 = RANSACRegressor(base_estimator, min_samples=2.0,
|
||||
residual_threshold=5, random_state=0)
|
||||
ransac_estimator6 = RANSACRegressor(base_estimator,
|
||||
residual_threshold=5, random_state=0)
|
||||
ransac_estimator7 = RANSACRegressor(base_estimator,
|
||||
min_samples=X.shape[0] + 1,
|
||||
residual_threshold=5, random_state=0)
|
||||
|
||||
ransac_estimator1.fit(X, y)
|
||||
ransac_estimator2.fit(X, y)
|
||||
ransac_estimator5.fit(X, y)
|
||||
ransac_estimator6.fit(X, y)
|
||||
|
||||
assert_array_almost_equal(ransac_estimator1.predict(X),
|
||||
ransac_estimator2.predict(X))
|
||||
assert_array_almost_equal(ransac_estimator1.predict(X),
|
||||
ransac_estimator5.predict(X))
|
||||
assert_array_almost_equal(ransac_estimator1.predict(X),
|
||||
ransac_estimator6.predict(X))
|
||||
|
||||
assert_raises(ValueError, ransac_estimator3.fit, X, y)
|
||||
assert_raises(ValueError, ransac_estimator4.fit, X, y)
|
||||
assert_raises(ValueError, ransac_estimator7.fit, X, y)
|
||||
|
||||
|
||||
def test_ransac_multi_dimensional_targets():
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5, random_state=0)
|
||||
|
||||
# 3-D target values
|
||||
yyy = np.column_stack([y, y, y])
|
||||
|
||||
# Estimate parameters of corrupted data
|
||||
ransac_estimator.fit(X, yyy)
|
||||
|
||||
# Ground truth / reference inlier mask
|
||||
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
|
||||
).astype(np.bool_)
|
||||
ref_inlier_mask[outliers] = False
|
||||
|
||||
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
|
||||
|
||||
|
||||
def test_ransac_residual_loss():
|
||||
loss_multi1 = lambda y_true, y_pred: np.sum(np.abs(y_true - y_pred), axis=1)
|
||||
loss_multi2 = lambda y_true, y_pred: np.sum((y_true - y_pred) ** 2, axis=1)
|
||||
|
||||
loss_mono = lambda y_true, y_pred : np.abs(y_true - y_pred)
|
||||
yyy = np.column_stack([y, y, y])
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator0 = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5, random_state=0)
|
||||
ransac_estimator1 = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5, random_state=0,
|
||||
loss=loss_multi1)
|
||||
ransac_estimator2 = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5, random_state=0,
|
||||
loss=loss_multi2)
|
||||
|
||||
# multi-dimensional
|
||||
ransac_estimator0.fit(X, yyy)
|
||||
ransac_estimator1.fit(X, yyy)
|
||||
ransac_estimator2.fit(X, yyy)
|
||||
assert_array_almost_equal(ransac_estimator0.predict(X),
|
||||
ransac_estimator1.predict(X))
|
||||
assert_array_almost_equal(ransac_estimator0.predict(X),
|
||||
ransac_estimator2.predict(X))
|
||||
|
||||
# one-dimensional
|
||||
ransac_estimator0.fit(X, y)
|
||||
ransac_estimator2.loss = loss_mono
|
||||
ransac_estimator2.fit(X, y)
|
||||
assert_array_almost_equal(ransac_estimator0.predict(X),
|
||||
ransac_estimator2.predict(X))
|
||||
ransac_estimator3 = RANSACRegressor(base_estimator, min_samples=2,
|
||||
residual_threshold=5, random_state=0,
|
||||
loss="squared_loss")
|
||||
ransac_estimator3.fit(X, y)
|
||||
assert_array_almost_equal(ransac_estimator0.predict(X),
|
||||
ransac_estimator2.predict(X))
|
||||
|
||||
|
||||
def test_ransac_default_residual_threshold():
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
random_state=0)
|
||||
|
||||
# Estimate parameters of corrupted data
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
# Ground truth / reference inlier mask
|
||||
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
|
||||
).astype(np.bool_)
|
||||
ref_inlier_mask[outliers] = False
|
||||
|
||||
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
|
||||
|
||||
|
||||
def test_ransac_dynamic_max_trials():
|
||||
# Numbers hand-calculated and confirmed on page 119 (Table 4.3) in
|
||||
# Hartley, R.~I. and Zisserman, A., 2004,
|
||||
# Multiple View Geometry in Computer Vision, Second Edition,
|
||||
# Cambridge University Press, ISBN: 0521540518
|
||||
|
||||
# e = 0%, min_samples = X
|
||||
assert _dynamic_max_trials(100, 100, 2, 0.99) == 1
|
||||
|
||||
# e = 5%, min_samples = 2
|
||||
assert _dynamic_max_trials(95, 100, 2, 0.99) == 2
|
||||
# e = 10%, min_samples = 2
|
||||
assert _dynamic_max_trials(90, 100, 2, 0.99) == 3
|
||||
# e = 30%, min_samples = 2
|
||||
assert _dynamic_max_trials(70, 100, 2, 0.99) == 7
|
||||
# e = 50%, min_samples = 2
|
||||
assert _dynamic_max_trials(50, 100, 2, 0.99) == 17
|
||||
|
||||
# e = 5%, min_samples = 8
|
||||
assert _dynamic_max_trials(95, 100, 8, 0.99) == 5
|
||||
# e = 10%, min_samples = 8
|
||||
assert _dynamic_max_trials(90, 100, 8, 0.99) == 9
|
||||
# e = 30%, min_samples = 8
|
||||
assert _dynamic_max_trials(70, 100, 8, 0.99) == 78
|
||||
# e = 50%, min_samples = 8
|
||||
assert _dynamic_max_trials(50, 100, 8, 0.99) == 1177
|
||||
|
||||
# e = 0%, min_samples = 10
|
||||
assert _dynamic_max_trials(1, 100, 10, 0) == 0
|
||||
assert _dynamic_max_trials(1, 100, 10, 1) == float('inf')
|
||||
|
||||
base_estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
stop_probability=-0.1)
|
||||
assert_raises(ValueError, ransac_estimator.fit, X, y)
|
||||
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
|
||||
stop_probability=1.1)
|
||||
assert_raises(ValueError, ransac_estimator.fit, X, y)
|
||||
|
||||
|
||||
def test_ransac_fit_sample_weight():
|
||||
ransac_estimator = RANSACRegressor(random_state=0)
|
||||
n_samples = y.shape[0]
|
||||
weights = np.ones(n_samples)
|
||||
ransac_estimator.fit(X, y, weights)
|
||||
# sanity check
|
||||
assert ransac_estimator.inlier_mask_.shape[0] == n_samples
|
||||
|
||||
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
|
||||
).astype(np.bool_)
|
||||
ref_inlier_mask[outliers] = False
|
||||
# check that mask is correct
|
||||
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
|
||||
|
||||
# check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
|
||||
# X = X1 repeated n1 times, X2 repeated n2 times and so forth
|
||||
random_state = check_random_state(0)
|
||||
X_ = random_state.randint(0, 200, [10, 1])
|
||||
y_ = np.ndarray.flatten(0.2 * X_ + 2)
|
||||
sample_weight = random_state.randint(0, 10, 10)
|
||||
outlier_X = random_state.randint(0, 1000, [1, 1])
|
||||
outlier_weight = random_state.randint(0, 10, 1)
|
||||
outlier_y = random_state.randint(-1000, 0, 1)
|
||||
|
||||
X_flat = np.append(np.repeat(X_, sample_weight, axis=0),
|
||||
np.repeat(outlier_X, outlier_weight, axis=0), axis=0)
|
||||
y_flat = np.ndarray.flatten(np.append(np.repeat(y_, sample_weight, axis=0),
|
||||
np.repeat(outlier_y, outlier_weight, axis=0),
|
||||
axis=0))
|
||||
ransac_estimator.fit(X_flat, y_flat)
|
||||
ref_coef_ = ransac_estimator.estimator_.coef_
|
||||
|
||||
sample_weight = np.append(sample_weight, outlier_weight)
|
||||
X_ = np.append(X_, outlier_X, axis=0)
|
||||
y_ = np.append(y_, outlier_y)
|
||||
ransac_estimator.fit(X_, y_, sample_weight)
|
||||
|
||||
assert_allclose(ransac_estimator.estimator_.coef_, ref_coef_)
|
||||
|
||||
# check that if base_estimator.fit doesn't support
|
||||
# sample_weight, raises error
|
||||
base_estimator = OrthogonalMatchingPursuit()
|
||||
ransac_estimator = RANSACRegressor(base_estimator)
|
||||
assert_raises(ValueError, ransac_estimator.fit, X, y, weights)
|
||||
|
||||
|
||||
def test_ransac_final_model_fit_sample_weight():
|
||||
X, y = make_regression(n_samples=1000, random_state=10)
|
||||
rng = check_random_state(42)
|
||||
sample_weight = rng.randint(1, 4, size=y.shape[0])
|
||||
sample_weight = sample_weight / sample_weight.sum()
|
||||
ransac = RANSACRegressor(base_estimator=LinearRegression(), random_state=0)
|
||||
ransac.fit(X, y, sample_weight=sample_weight)
|
||||
|
||||
final_model = LinearRegression()
|
||||
mask_samples = ransac.inlier_mask_
|
||||
final_model.fit(
|
||||
X[mask_samples], y[mask_samples],
|
||||
sample_weight=sample_weight[mask_samples]
|
||||
)
|
||||
|
||||
assert_allclose(ransac.estimator_.coef_, final_model.coef_)
|
1326
venv/Lib/site-packages/sklearn/linear_model/tests/test_ridge.py
Normal file
1326
venv/Lib/site-packages/sklearn/linear_model/tests/test_ridge.py
Normal file
File diff suppressed because it is too large
Load diff
848
venv/Lib/site-packages/sklearn/linear_model/tests/test_sag.py
Normal file
848
venv/Lib/site-packages/sklearn/linear_model/tests/test_sag.py
Normal file
|
@ -0,0 +1,848 @@
|
|||
# Authors: Danny Sullivan <dbsullivan23@gmail.com>
|
||||
# Tom Dupre la Tour <tom.dupre-la-tour@m4x.org>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
import math
|
||||
import pytest
|
||||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
from scipy.special import logsumexp
|
||||
|
||||
from sklearn.linear_model._sag import get_auto_step_size
|
||||
from sklearn.linear_model._sag_fast import _multinomial_grad_loss_all_samples
|
||||
from sklearn.linear_model import LogisticRegression, Ridge
|
||||
from sklearn.linear_model._base import make_dataset
|
||||
from sklearn.linear_model._logistic import _multinomial_loss_grad
|
||||
|
||||
from sklearn.utils.extmath import row_norms
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_allclose
|
||||
from sklearn.utils._testing import assert_raise_message
|
||||
from sklearn.utils import compute_class_weight
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.preprocessing import LabelEncoder, LabelBinarizer
|
||||
from sklearn.datasets import make_blobs, load_iris, make_classification
|
||||
from sklearn.base import clone
|
||||
|
||||
iris = load_iris()
|
||||
|
||||
|
||||
# this is used for sag classification
|
||||
def log_dloss(p, y):
|
||||
z = p * y
|
||||
# approximately equal and saves the computation of the log
|
||||
if z > 18.0:
|
||||
return math.exp(-z) * -y
|
||||
if z < -18.0:
|
||||
return -y
|
||||
return -y / (math.exp(z) + 1.0)
|
||||
|
||||
|
||||
def log_loss(p, y):
|
||||
return np.mean(np.log(1. + np.exp(-y * p)))
|
||||
|
||||
|
||||
# this is used for sag regression
|
||||
def squared_dloss(p, y):
|
||||
return p - y
|
||||
|
||||
|
||||
def squared_loss(p, y):
|
||||
return np.mean(0.5 * (p - y) * (p - y))
|
||||
|
||||
|
||||
# function for measuring the log loss
|
||||
def get_pobj(w, alpha, myX, myy, loss):
|
||||
w = w.ravel()
|
||||
pred = np.dot(myX, w)
|
||||
p = loss(pred, myy)
|
||||
p += alpha * w.dot(w) / 2.
|
||||
return p
|
||||
|
||||
|
||||
def sag(X, y, step_size, alpha, n_iter=1, dloss=None, sparse=False,
|
||||
sample_weight=None, fit_intercept=True, saga=False):
|
||||
n_samples, n_features = X.shape[0], X.shape[1]
|
||||
|
||||
weights = np.zeros(X.shape[1])
|
||||
sum_gradient = np.zeros(X.shape[1])
|
||||
gradient_memory = np.zeros((n_samples, n_features))
|
||||
|
||||
intercept = 0.0
|
||||
intercept_sum_gradient = 0.0
|
||||
intercept_gradient_memory = np.zeros(n_samples)
|
||||
|
||||
rng = np.random.RandomState(77)
|
||||
decay = 1.0
|
||||
seen = set()
|
||||
|
||||
# sparse data has a fixed decay of .01
|
||||
if sparse:
|
||||
decay = .01
|
||||
|
||||
for epoch in range(n_iter):
|
||||
for k in range(n_samples):
|
||||
idx = int(rng.rand(1) * n_samples)
|
||||
# idx = k
|
||||
entry = X[idx]
|
||||
seen.add(idx)
|
||||
p = np.dot(entry, weights) + intercept
|
||||
gradient = dloss(p, y[idx])
|
||||
if sample_weight is not None:
|
||||
gradient *= sample_weight[idx]
|
||||
update = entry * gradient + alpha * weights
|
||||
gradient_correction = update - gradient_memory[idx]
|
||||
sum_gradient += gradient_correction
|
||||
gradient_memory[idx] = update
|
||||
if saga:
|
||||
weights -= (gradient_correction *
|
||||
step_size * (1 - 1. / len(seen)))
|
||||
|
||||
if fit_intercept:
|
||||
gradient_correction = (gradient -
|
||||
intercept_gradient_memory[idx])
|
||||
intercept_gradient_memory[idx] = gradient
|
||||
intercept_sum_gradient += gradient_correction
|
||||
gradient_correction *= step_size * (1. - 1. / len(seen))
|
||||
if saga:
|
||||
intercept -= (step_size * intercept_sum_gradient /
|
||||
len(seen) * decay) + gradient_correction
|
||||
else:
|
||||
intercept -= (step_size * intercept_sum_gradient /
|
||||
len(seen) * decay)
|
||||
|
||||
weights -= step_size * sum_gradient / len(seen)
|
||||
|
||||
return weights, intercept
|
||||
|
||||
|
||||
def sag_sparse(X, y, step_size, alpha, n_iter=1,
|
||||
dloss=None, sample_weight=None, sparse=False,
|
||||
fit_intercept=True, saga=False, random_state=0):
|
||||
if step_size * alpha == 1.:
|
||||
raise ZeroDivisionError("Sparse sag does not handle the case "
|
||||
"step_size * alpha == 1")
|
||||
n_samples, n_features = X.shape[0], X.shape[1]
|
||||
|
||||
weights = np.zeros(n_features)
|
||||
sum_gradient = np.zeros(n_features)
|
||||
last_updated = np.zeros(n_features, dtype=np.int)
|
||||
gradient_memory = np.zeros(n_samples)
|
||||
rng = check_random_state(random_state)
|
||||
intercept = 0.0
|
||||
intercept_sum_gradient = 0.0
|
||||
wscale = 1.0
|
||||
decay = 1.0
|
||||
seen = set()
|
||||
|
||||
c_sum = np.zeros(n_iter * n_samples)
|
||||
|
||||
# sparse data has a fixed decay of .01
|
||||
if sparse:
|
||||
decay = .01
|
||||
|
||||
counter = 0
|
||||
for epoch in range(n_iter):
|
||||
for k in range(n_samples):
|
||||
# idx = k
|
||||
idx = int(rng.rand(1) * n_samples)
|
||||
entry = X[idx]
|
||||
seen.add(idx)
|
||||
|
||||
if counter >= 1:
|
||||
for j in range(n_features):
|
||||
if last_updated[j] == 0:
|
||||
weights[j] -= c_sum[counter - 1] * sum_gradient[j]
|
||||
else:
|
||||
weights[j] -= ((c_sum[counter - 1] -
|
||||
c_sum[last_updated[j] - 1]) *
|
||||
sum_gradient[j])
|
||||
last_updated[j] = counter
|
||||
|
||||
p = (wscale * np.dot(entry, weights)) + intercept
|
||||
gradient = dloss(p, y[idx])
|
||||
|
||||
if sample_weight is not None:
|
||||
gradient *= sample_weight[idx]
|
||||
|
||||
update = entry * gradient
|
||||
gradient_correction = update - (gradient_memory[idx] * entry)
|
||||
sum_gradient += gradient_correction
|
||||
if saga:
|
||||
for j in range(n_features):
|
||||
weights[j] -= (gradient_correction[j] * step_size *
|
||||
(1 - 1. / len(seen)) / wscale)
|
||||
|
||||
if fit_intercept:
|
||||
gradient_correction = gradient - gradient_memory[idx]
|
||||
intercept_sum_gradient += gradient_correction
|
||||
gradient_correction *= step_size * (1. - 1. / len(seen))
|
||||
if saga:
|
||||
intercept -= ((step_size * intercept_sum_gradient /
|
||||
len(seen) * decay) +
|
||||
gradient_correction)
|
||||
else:
|
||||
intercept -= (step_size * intercept_sum_gradient /
|
||||
len(seen) * decay)
|
||||
|
||||
gradient_memory[idx] = gradient
|
||||
|
||||
wscale *= (1.0 - alpha * step_size)
|
||||
if counter == 0:
|
||||
c_sum[0] = step_size / (wscale * len(seen))
|
||||
else:
|
||||
c_sum[counter] = (c_sum[counter - 1] +
|
||||
step_size / (wscale * len(seen)))
|
||||
|
||||
if counter >= 1 and wscale < 1e-9:
|
||||
for j in range(n_features):
|
||||
if last_updated[j] == 0:
|
||||
weights[j] -= c_sum[counter] * sum_gradient[j]
|
||||
else:
|
||||
weights[j] -= ((c_sum[counter] -
|
||||
c_sum[last_updated[j] - 1]) *
|
||||
sum_gradient[j])
|
||||
last_updated[j] = counter + 1
|
||||
c_sum[counter] = 0
|
||||
weights *= wscale
|
||||
wscale = 1.0
|
||||
|
||||
counter += 1
|
||||
|
||||
for j in range(n_features):
|
||||
if last_updated[j] == 0:
|
||||
weights[j] -= c_sum[counter - 1] * sum_gradient[j]
|
||||
else:
|
||||
weights[j] -= ((c_sum[counter - 1] -
|
||||
c_sum[last_updated[j] - 1]) *
|
||||
sum_gradient[j])
|
||||
weights *= wscale
|
||||
return weights, intercept
|
||||
|
||||
|
||||
def get_step_size(X, alpha, fit_intercept, classification=True):
|
||||
if classification:
|
||||
return (4.0 / (np.max(np.sum(X * X, axis=1)) +
|
||||
fit_intercept + 4.0 * alpha))
|
||||
else:
|
||||
return 1.0 / (np.max(np.sum(X * X, axis=1)) + fit_intercept + alpha)
|
||||
|
||||
|
||||
def test_classifier_matching():
|
||||
n_samples = 20
|
||||
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0,
|
||||
cluster_std=0.1)
|
||||
y[y == 0] = -1
|
||||
alpha = 1.1
|
||||
fit_intercept = True
|
||||
step_size = get_step_size(X, alpha, fit_intercept)
|
||||
for solver in ['sag', 'saga']:
|
||||
if solver == 'sag':
|
||||
n_iter = 80
|
||||
else:
|
||||
# SAGA variance w.r.t. stream order is higher
|
||||
n_iter = 300
|
||||
clf = LogisticRegression(solver=solver, fit_intercept=fit_intercept,
|
||||
tol=1e-11, C=1. / alpha / n_samples,
|
||||
max_iter=n_iter, random_state=10,
|
||||
multi_class='ovr')
|
||||
clf.fit(X, y)
|
||||
|
||||
weights, intercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
|
||||
dloss=log_dloss,
|
||||
fit_intercept=fit_intercept,
|
||||
saga=solver == 'saga')
|
||||
weights2, intercept2 = sag(X, y, step_size, alpha, n_iter=n_iter,
|
||||
dloss=log_dloss,
|
||||
fit_intercept=fit_intercept,
|
||||
saga=solver == 'saga')
|
||||
weights = np.atleast_2d(weights)
|
||||
intercept = np.atleast_1d(intercept)
|
||||
weights2 = np.atleast_2d(weights2)
|
||||
intercept2 = np.atleast_1d(intercept2)
|
||||
|
||||
assert_array_almost_equal(weights, clf.coef_, decimal=9)
|
||||
assert_array_almost_equal(intercept, clf.intercept_, decimal=9)
|
||||
assert_array_almost_equal(weights2, clf.coef_, decimal=9)
|
||||
assert_array_almost_equal(intercept2, clf.intercept_, decimal=9)
|
||||
|
||||
|
||||
def test_regressor_matching():
|
||||
n_samples = 10
|
||||
n_features = 5
|
||||
|
||||
rng = np.random.RandomState(10)
|
||||
X = rng.normal(size=(n_samples, n_features))
|
||||
true_w = rng.normal(size=n_features)
|
||||
y = X.dot(true_w)
|
||||
|
||||
alpha = 1.
|
||||
n_iter = 100
|
||||
fit_intercept = True
|
||||
|
||||
step_size = get_step_size(X, alpha, fit_intercept, classification=False)
|
||||
clf = Ridge(fit_intercept=fit_intercept, tol=.00000000001, solver='sag',
|
||||
alpha=alpha * n_samples, max_iter=n_iter)
|
||||
clf.fit(X, y)
|
||||
|
||||
weights1, intercept1 = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
|
||||
dloss=squared_dloss,
|
||||
fit_intercept=fit_intercept)
|
||||
weights2, intercept2 = sag(X, y, step_size, alpha, n_iter=n_iter,
|
||||
dloss=squared_dloss,
|
||||
fit_intercept=fit_intercept)
|
||||
|
||||
assert_allclose(weights1, clf.coef_)
|
||||
assert_allclose(intercept1, clf.intercept_)
|
||||
assert_allclose(weights2, clf.coef_)
|
||||
assert_allclose(intercept2, clf.intercept_)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore:The max_iter was reached')
|
||||
def test_sag_pobj_matches_logistic_regression():
|
||||
"""tests if the sag pobj matches log reg"""
|
||||
n_samples = 100
|
||||
alpha = 1.0
|
||||
max_iter = 20
|
||||
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0,
|
||||
cluster_std=0.1)
|
||||
|
||||
clf1 = LogisticRegression(solver='sag', fit_intercept=False, tol=.0000001,
|
||||
C=1. / alpha / n_samples, max_iter=max_iter,
|
||||
random_state=10, multi_class='ovr')
|
||||
clf2 = clone(clf1)
|
||||
clf3 = LogisticRegression(fit_intercept=False, tol=.0000001,
|
||||
C=1. / alpha / n_samples, max_iter=max_iter,
|
||||
random_state=10, multi_class='ovr')
|
||||
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(sp.csr_matrix(X), y)
|
||||
clf3.fit(X, y)
|
||||
|
||||
pobj1 = get_pobj(clf1.coef_, alpha, X, y, log_loss)
|
||||
pobj2 = get_pobj(clf2.coef_, alpha, X, y, log_loss)
|
||||
pobj3 = get_pobj(clf3.coef_, alpha, X, y, log_loss)
|
||||
|
||||
assert_array_almost_equal(pobj1, pobj2, decimal=4)
|
||||
assert_array_almost_equal(pobj2, pobj3, decimal=4)
|
||||
assert_array_almost_equal(pobj3, pobj1, decimal=4)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore:The max_iter was reached')
|
||||
def test_sag_pobj_matches_ridge_regression():
|
||||
"""tests if the sag pobj matches ridge reg"""
|
||||
n_samples = 100
|
||||
n_features = 10
|
||||
alpha = 1.0
|
||||
n_iter = 100
|
||||
fit_intercept = False
|
||||
rng = np.random.RandomState(10)
|
||||
X = rng.normal(size=(n_samples, n_features))
|
||||
true_w = rng.normal(size=n_features)
|
||||
y = X.dot(true_w)
|
||||
|
||||
clf1 = Ridge(fit_intercept=fit_intercept, tol=.00000000001, solver='sag',
|
||||
alpha=alpha, max_iter=n_iter, random_state=42)
|
||||
clf2 = clone(clf1)
|
||||
clf3 = Ridge(fit_intercept=fit_intercept, tol=.00001, solver='lsqr',
|
||||
alpha=alpha, max_iter=n_iter, random_state=42)
|
||||
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(sp.csr_matrix(X), y)
|
||||
clf3.fit(X, y)
|
||||
|
||||
pobj1 = get_pobj(clf1.coef_, alpha, X, y, squared_loss)
|
||||
pobj2 = get_pobj(clf2.coef_, alpha, X, y, squared_loss)
|
||||
pobj3 = get_pobj(clf3.coef_, alpha, X, y, squared_loss)
|
||||
|
||||
assert_array_almost_equal(pobj1, pobj2, decimal=4)
|
||||
assert_array_almost_equal(pobj1, pobj3, decimal=4)
|
||||
assert_array_almost_equal(pobj3, pobj2, decimal=4)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore:The max_iter was reached')
|
||||
def test_sag_regressor_computed_correctly():
|
||||
"""tests if the sag regressor is computed correctly"""
|
||||
alpha = .1
|
||||
n_features = 10
|
||||
n_samples = 40
|
||||
max_iter = 100
|
||||
tol = .000001
|
||||
fit_intercept = True
|
||||
rng = np.random.RandomState(0)
|
||||
X = rng.normal(size=(n_samples, n_features))
|
||||
w = rng.normal(size=n_features)
|
||||
y = np.dot(X, w) + 2.
|
||||
step_size = get_step_size(X, alpha, fit_intercept, classification=False)
|
||||
|
||||
clf1 = Ridge(fit_intercept=fit_intercept, tol=tol, solver='sag',
|
||||
alpha=alpha * n_samples, max_iter=max_iter,
|
||||
random_state=rng)
|
||||
clf2 = clone(clf1)
|
||||
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(sp.csr_matrix(X), y)
|
||||
|
||||
spweights1, spintercept1 = sag_sparse(X, y, step_size, alpha,
|
||||
n_iter=max_iter,
|
||||
dloss=squared_dloss,
|
||||
fit_intercept=fit_intercept,
|
||||
random_state=rng)
|
||||
|
||||
spweights2, spintercept2 = sag_sparse(X, y, step_size, alpha,
|
||||
n_iter=max_iter,
|
||||
dloss=squared_dloss, sparse=True,
|
||||
fit_intercept=fit_intercept,
|
||||
random_state=rng)
|
||||
|
||||
assert_array_almost_equal(clf1.coef_.ravel(),
|
||||
spweights1.ravel(),
|
||||
decimal=3)
|
||||
assert_almost_equal(clf1.intercept_, spintercept1, decimal=1)
|
||||
|
||||
# TODO: uncomment when sparse Ridge with intercept will be fixed (#4710)
|
||||
# assert_array_almost_equal(clf2.coef_.ravel(),
|
||||
# spweights2.ravel(),
|
||||
# decimal=3)
|
||||
# assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)'''
|
||||
|
||||
|
||||
def test_get_auto_step_size():
|
||||
X = np.array([[1, 2, 3], [2, 3, 4], [2, 3, 2]], dtype=np.float64)
|
||||
alpha = 1.2
|
||||
fit_intercept = False
|
||||
# sum the squares of the second sample because that's the largest
|
||||
max_squared_sum = 4 + 9 + 16
|
||||
max_squared_sum_ = row_norms(X, squared=True).max()
|
||||
n_samples = X.shape[0]
|
||||
assert_almost_equal(max_squared_sum, max_squared_sum_, decimal=4)
|
||||
|
||||
for saga in [True, False]:
|
||||
for fit_intercept in (True, False):
|
||||
if saga:
|
||||
L_sqr = (max_squared_sum + alpha + int(fit_intercept))
|
||||
L_log = (max_squared_sum + 4.0 * alpha +
|
||||
int(fit_intercept)) / 4.0
|
||||
mun_sqr = min(2 * n_samples * alpha, L_sqr)
|
||||
mun_log = min(2 * n_samples * alpha, L_log)
|
||||
step_size_sqr = 1 / (2 * L_sqr + mun_sqr)
|
||||
step_size_log = 1 / (2 * L_log + mun_log)
|
||||
else:
|
||||
step_size_sqr = 1.0 / (max_squared_sum +
|
||||
alpha + int(fit_intercept))
|
||||
step_size_log = 4.0 / (max_squared_sum + 4.0 * alpha +
|
||||
int(fit_intercept))
|
||||
|
||||
step_size_sqr_ = get_auto_step_size(max_squared_sum_, alpha,
|
||||
"squared",
|
||||
fit_intercept,
|
||||
n_samples=n_samples,
|
||||
is_saga=saga)
|
||||
step_size_log_ = get_auto_step_size(max_squared_sum_, alpha, "log",
|
||||
fit_intercept,
|
||||
n_samples=n_samples,
|
||||
is_saga=saga)
|
||||
|
||||
assert_almost_equal(step_size_sqr, step_size_sqr_, decimal=4)
|
||||
assert_almost_equal(step_size_log, step_size_log_, decimal=4)
|
||||
|
||||
msg = 'Unknown loss function for SAG solver, got wrong instead of'
|
||||
assert_raise_message(ValueError, msg, get_auto_step_size,
|
||||
max_squared_sum_, alpha, "wrong", fit_intercept)
|
||||
|
||||
|
||||
def test_sag_regressor():
|
||||
"""tests if the sag regressor performs well"""
|
||||
xmin, xmax = -5, 5
|
||||
n_samples = 20
|
||||
tol = .001
|
||||
max_iter = 50
|
||||
alpha = 0.1
|
||||
rng = np.random.RandomState(0)
|
||||
X = np.linspace(xmin, xmax, n_samples).reshape(n_samples, 1)
|
||||
|
||||
# simple linear function without noise
|
||||
y = 0.5 * X.ravel()
|
||||
|
||||
clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter,
|
||||
alpha=alpha * n_samples, random_state=rng)
|
||||
clf2 = clone(clf1)
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(sp.csr_matrix(X), y)
|
||||
score1 = clf1.score(X, y)
|
||||
score2 = clf2.score(X, y)
|
||||
assert score1 > 0.99
|
||||
assert score2 > 0.99
|
||||
|
||||
# simple linear function with noise
|
||||
y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel()
|
||||
|
||||
clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter,
|
||||
alpha=alpha * n_samples)
|
||||
clf2 = clone(clf1)
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(sp.csr_matrix(X), y)
|
||||
score1 = clf1.score(X, y)
|
||||
score2 = clf2.score(X, y)
|
||||
score2 = clf2.score(X, y)
|
||||
assert score1 > 0.5
|
||||
assert score2 > 0.5
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore:The max_iter was reached')
|
||||
def test_sag_classifier_computed_correctly():
|
||||
"""tests if the binary classifier is computed correctly"""
|
||||
alpha = .1
|
||||
n_samples = 50
|
||||
n_iter = 50
|
||||
tol = .00001
|
||||
fit_intercept = True
|
||||
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0,
|
||||
cluster_std=0.1)
|
||||
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
|
||||
classes = np.unique(y)
|
||||
y_tmp = np.ones(n_samples)
|
||||
y_tmp[y != classes[1]] = -1
|
||||
y = y_tmp
|
||||
|
||||
clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
|
||||
max_iter=n_iter, tol=tol, random_state=77,
|
||||
fit_intercept=fit_intercept, multi_class='ovr')
|
||||
clf2 = clone(clf1)
|
||||
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(sp.csr_matrix(X), y)
|
||||
|
||||
spweights, spintercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
|
||||
dloss=log_dloss,
|
||||
fit_intercept=fit_intercept)
|
||||
spweights2, spintercept2 = sag_sparse(X, y, step_size, alpha,
|
||||
n_iter=n_iter,
|
||||
dloss=log_dloss, sparse=True,
|
||||
fit_intercept=fit_intercept)
|
||||
|
||||
assert_array_almost_equal(clf1.coef_.ravel(),
|
||||
spweights.ravel(),
|
||||
decimal=2)
|
||||
assert_almost_equal(clf1.intercept_, spintercept, decimal=1)
|
||||
|
||||
assert_array_almost_equal(clf2.coef_.ravel(),
|
||||
spweights2.ravel(),
|
||||
decimal=2)
|
||||
assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore:The max_iter was reached')
|
||||
def test_sag_multiclass_computed_correctly():
|
||||
"""tests if the multiclass classifier is computed correctly"""
|
||||
alpha = .1
|
||||
n_samples = 20
|
||||
tol = .00001
|
||||
max_iter = 40
|
||||
fit_intercept = True
|
||||
X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0,
|
||||
cluster_std=0.1)
|
||||
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
|
||||
classes = np.unique(y)
|
||||
|
||||
clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
|
||||
max_iter=max_iter, tol=tol, random_state=77,
|
||||
fit_intercept=fit_intercept, multi_class='ovr')
|
||||
clf2 = clone(clf1)
|
||||
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(sp.csr_matrix(X), y)
|
||||
|
||||
coef1 = []
|
||||
intercept1 = []
|
||||
coef2 = []
|
||||
intercept2 = []
|
||||
for cl in classes:
|
||||
y_encoded = np.ones(n_samples)
|
||||
y_encoded[y != cl] = -1
|
||||
|
||||
spweights1, spintercept1 = sag_sparse(X, y_encoded, step_size, alpha,
|
||||
dloss=log_dloss, n_iter=max_iter,
|
||||
fit_intercept=fit_intercept)
|
||||
spweights2, spintercept2 = sag_sparse(X, y_encoded, step_size, alpha,
|
||||
dloss=log_dloss, n_iter=max_iter,
|
||||
sparse=True,
|
||||
fit_intercept=fit_intercept)
|
||||
coef1.append(spweights1)
|
||||
intercept1.append(spintercept1)
|
||||
|
||||
coef2.append(spweights2)
|
||||
intercept2.append(spintercept2)
|
||||
|
||||
coef1 = np.vstack(coef1)
|
||||
intercept1 = np.array(intercept1)
|
||||
coef2 = np.vstack(coef2)
|
||||
intercept2 = np.array(intercept2)
|
||||
|
||||
for i, cl in enumerate(classes):
|
||||
assert_array_almost_equal(clf1.coef_[i].ravel(),
|
||||
coef1[i].ravel(),
|
||||
decimal=2)
|
||||
assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1)
|
||||
|
||||
assert_array_almost_equal(clf2.coef_[i].ravel(),
|
||||
coef2[i].ravel(),
|
||||
decimal=2)
|
||||
assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)
|
||||
|
||||
|
||||
def test_classifier_results():
|
||||
"""tests if classifier results match target"""
|
||||
alpha = .1
|
||||
n_features = 20
|
||||
n_samples = 10
|
||||
tol = .01
|
||||
max_iter = 200
|
||||
rng = np.random.RandomState(0)
|
||||
X = rng.normal(size=(n_samples, n_features))
|
||||
w = rng.normal(size=n_features)
|
||||
y = np.dot(X, w)
|
||||
y = np.sign(y)
|
||||
clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
|
||||
max_iter=max_iter, tol=tol, random_state=77)
|
||||
clf2 = clone(clf1)
|
||||
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(sp.csr_matrix(X), y)
|
||||
pred1 = clf1.predict(X)
|
||||
pred2 = clf2.predict(X)
|
||||
assert_almost_equal(pred1, y, decimal=12)
|
||||
assert_almost_equal(pred2, y, decimal=12)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore:The max_iter was reached')
|
||||
def test_binary_classifier_class_weight():
|
||||
"""tests binary classifier with classweights for each class"""
|
||||
alpha = .1
|
||||
n_samples = 50
|
||||
n_iter = 20
|
||||
tol = .00001
|
||||
fit_intercept = True
|
||||
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=10,
|
||||
cluster_std=0.1)
|
||||
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
|
||||
classes = np.unique(y)
|
||||
y_tmp = np.ones(n_samples)
|
||||
y_tmp[y != classes[1]] = -1
|
||||
y = y_tmp
|
||||
|
||||
class_weight = {1: .45, -1: .55}
|
||||
clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
|
||||
max_iter=n_iter, tol=tol, random_state=77,
|
||||
fit_intercept=fit_intercept, multi_class='ovr',
|
||||
class_weight=class_weight)
|
||||
clf2 = clone(clf1)
|
||||
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(sp.csr_matrix(X), y)
|
||||
|
||||
le = LabelEncoder()
|
||||
class_weight_ = compute_class_weight(class_weight, classes=np.unique(y),
|
||||
y=y)
|
||||
sample_weight = class_weight_[le.fit_transform(y)]
|
||||
spweights, spintercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
|
||||
dloss=log_dloss,
|
||||
sample_weight=sample_weight,
|
||||
fit_intercept=fit_intercept)
|
||||
spweights2, spintercept2 = sag_sparse(X, y, step_size, alpha,
|
||||
n_iter=n_iter,
|
||||
dloss=log_dloss, sparse=True,
|
||||
sample_weight=sample_weight,
|
||||
fit_intercept=fit_intercept)
|
||||
|
||||
assert_array_almost_equal(clf1.coef_.ravel(),
|
||||
spweights.ravel(),
|
||||
decimal=2)
|
||||
assert_almost_equal(clf1.intercept_, spintercept, decimal=1)
|
||||
|
||||
assert_array_almost_equal(clf2.coef_.ravel(),
|
||||
spweights2.ravel(),
|
||||
decimal=2)
|
||||
assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore:The max_iter was reached')
|
||||
def test_multiclass_classifier_class_weight():
|
||||
"""tests multiclass with classweights for each class"""
|
||||
alpha = .1
|
||||
n_samples = 20
|
||||
tol = .00001
|
||||
max_iter = 50
|
||||
class_weight = {0: .45, 1: .55, 2: .75}
|
||||
fit_intercept = True
|
||||
X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0,
|
||||
cluster_std=0.1)
|
||||
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
|
||||
classes = np.unique(y)
|
||||
|
||||
clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
|
||||
max_iter=max_iter, tol=tol, random_state=77,
|
||||
fit_intercept=fit_intercept, multi_class='ovr',
|
||||
class_weight=class_weight)
|
||||
clf2 = clone(clf1)
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(sp.csr_matrix(X), y)
|
||||
|
||||
le = LabelEncoder()
|
||||
class_weight_ = compute_class_weight(class_weight, classes=np.unique(y),
|
||||
y=y)
|
||||
sample_weight = class_weight_[le.fit_transform(y)]
|
||||
|
||||
coef1 = []
|
||||
intercept1 = []
|
||||
coef2 = []
|
||||
intercept2 = []
|
||||
for cl in classes:
|
||||
y_encoded = np.ones(n_samples)
|
||||
y_encoded[y != cl] = -1
|
||||
|
||||
spweights1, spintercept1 = sag_sparse(X, y_encoded, step_size, alpha,
|
||||
n_iter=max_iter, dloss=log_dloss,
|
||||
sample_weight=sample_weight)
|
||||
spweights2, spintercept2 = sag_sparse(X, y_encoded, step_size, alpha,
|
||||
n_iter=max_iter, dloss=log_dloss,
|
||||
sample_weight=sample_weight,
|
||||
sparse=True)
|
||||
coef1.append(spweights1)
|
||||
intercept1.append(spintercept1)
|
||||
coef2.append(spweights2)
|
||||
intercept2.append(spintercept2)
|
||||
|
||||
coef1 = np.vstack(coef1)
|
||||
intercept1 = np.array(intercept1)
|
||||
coef2 = np.vstack(coef2)
|
||||
intercept2 = np.array(intercept2)
|
||||
|
||||
for i, cl in enumerate(classes):
|
||||
assert_array_almost_equal(clf1.coef_[i].ravel(),
|
||||
coef1[i].ravel(),
|
||||
decimal=2)
|
||||
assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1)
|
||||
|
||||
assert_array_almost_equal(clf2.coef_[i].ravel(),
|
||||
coef2[i].ravel(),
|
||||
decimal=2)
|
||||
assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)
|
||||
|
||||
|
||||
def test_classifier_single_class():
|
||||
"""tests if ValueError is thrown with only one class"""
|
||||
X = [[1, 2], [3, 4]]
|
||||
y = [1, 1]
|
||||
|
||||
assert_raise_message(ValueError,
|
||||
"This solver needs samples of at least 2 classes "
|
||||
"in the data",
|
||||
LogisticRegression(solver='sag').fit,
|
||||
X, y)
|
||||
|
||||
|
||||
def test_step_size_alpha_error():
|
||||
X = [[0, 0], [0, 0]]
|
||||
y = [1, -1]
|
||||
fit_intercept = False
|
||||
alpha = 1.
|
||||
msg = ("Current sag implementation does not handle the case"
|
||||
" step_size * alpha_scaled == 1")
|
||||
|
||||
clf1 = LogisticRegression(solver='sag', C=1. / alpha,
|
||||
fit_intercept=fit_intercept)
|
||||
assert_raise_message(ZeroDivisionError, msg, clf1.fit, X, y)
|
||||
|
||||
clf2 = Ridge(fit_intercept=fit_intercept, solver='sag', alpha=alpha)
|
||||
assert_raise_message(ZeroDivisionError, msg, clf2.fit, X, y)
|
||||
|
||||
|
||||
def test_multinomial_loss():
|
||||
# test if the multinomial loss and gradient computations are consistent
|
||||
X, y = iris.data, iris.target.astype(np.float64)
|
||||
n_samples, n_features = X.shape
|
||||
n_classes = len(np.unique(y))
|
||||
|
||||
rng = check_random_state(42)
|
||||
weights = rng.randn(n_features, n_classes)
|
||||
intercept = rng.randn(n_classes)
|
||||
sample_weights = rng.randn(n_samples)
|
||||
np.abs(sample_weights, sample_weights)
|
||||
|
||||
# compute loss and gradient like in multinomial SAG
|
||||
dataset, _ = make_dataset(X, y, sample_weights, random_state=42)
|
||||
loss_1, grad_1 = _multinomial_grad_loss_all_samples(dataset, weights,
|
||||
intercept, n_samples,
|
||||
n_features, n_classes)
|
||||
# compute loss and gradient like in multinomial LogisticRegression
|
||||
lbin = LabelBinarizer()
|
||||
Y_bin = lbin.fit_transform(y)
|
||||
weights_intercept = np.vstack((weights, intercept)).T.ravel()
|
||||
loss_2, grad_2, _ = _multinomial_loss_grad(weights_intercept, X, Y_bin,
|
||||
0.0, sample_weights)
|
||||
grad_2 = grad_2.reshape(n_classes, -1)
|
||||
grad_2 = grad_2[:, :-1].T
|
||||
|
||||
# comparison
|
||||
assert_array_almost_equal(grad_1, grad_2)
|
||||
assert_almost_equal(loss_1, loss_2)
|
||||
|
||||
|
||||
def test_multinomial_loss_ground_truth():
|
||||
# n_samples, n_features, n_classes = 4, 2, 3
|
||||
n_classes = 3
|
||||
X = np.array([[1.1, 2.2], [2.2, -4.4], [3.3, -2.2], [1.1, 1.1]])
|
||||
y = np.array([0, 1, 2, 0])
|
||||
lbin = LabelBinarizer()
|
||||
Y_bin = lbin.fit_transform(y)
|
||||
|
||||
weights = np.array([[0.1, 0.2, 0.3], [1.1, 1.2, -1.3]])
|
||||
intercept = np.array([1., 0, -.2])
|
||||
sample_weights = np.array([0.8, 1, 1, 0.8])
|
||||
|
||||
prediction = np.dot(X, weights) + intercept
|
||||
logsumexp_prediction = logsumexp(prediction, axis=1)
|
||||
p = prediction - logsumexp_prediction[:, np.newaxis]
|
||||
loss_1 = -(sample_weights[:, np.newaxis] * p * Y_bin).sum()
|
||||
diff = sample_weights[:, np.newaxis] * (np.exp(p) - Y_bin)
|
||||
grad_1 = np.dot(X.T, diff)
|
||||
|
||||
weights_intercept = np.vstack((weights, intercept)).T.ravel()
|
||||
loss_2, grad_2, _ = _multinomial_loss_grad(weights_intercept, X, Y_bin,
|
||||
0.0, sample_weights)
|
||||
grad_2 = grad_2.reshape(n_classes, -1)
|
||||
grad_2 = grad_2[:, :-1].T
|
||||
|
||||
assert_almost_equal(loss_1, loss_2)
|
||||
assert_array_almost_equal(grad_1, grad_2)
|
||||
|
||||
# ground truth
|
||||
loss_gt = 11.680360354325961
|
||||
grad_gt = np.array([[-0.557487, -1.619151, +2.176638],
|
||||
[-0.903942, +5.258745, -4.354803]])
|
||||
assert_almost_equal(loss_1, loss_gt)
|
||||
assert_array_almost_equal(grad_1, grad_gt)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("solver", ["sag", "saga"])
|
||||
def test_sag_classifier_raises_error(solver):
|
||||
# Following #13316, the error handling behavior changed in cython sag. This
|
||||
# is simply a non-regression test to make sure numerical errors are
|
||||
# properly raised.
|
||||
|
||||
# Train a classifier on a simple problem
|
||||
rng = np.random.RandomState(42)
|
||||
X, y = make_classification(random_state=rng)
|
||||
clf = LogisticRegression(solver=solver, random_state=rng, warm_start=True)
|
||||
clf.fit(X, y)
|
||||
|
||||
# Trigger a numerical error by:
|
||||
# - corrupting the fitted coefficients of the classifier
|
||||
# - fit it again starting from its current state thanks to warm_start
|
||||
clf.coef_[:] = np.nan
|
||||
|
||||
with pytest.raises(ValueError, match="Floating-point under-/overflow"):
|
||||
clf.fit(X, y)
|
1621
venv/Lib/site-packages/sklearn/linear_model/tests/test_sgd.py
Normal file
1621
venv/Lib/site-packages/sklearn/linear_model/tests/test_sgd.py
Normal file
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,300 @@
|
|||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
|
||||
from sklearn.utils._testing import ignore_warnings
|
||||
from sklearn.utils._testing import assert_warns
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
|
||||
from sklearn.linear_model import Lasso, ElasticNet, LassoCV, ElasticNetCV
|
||||
|
||||
|
||||
def test_sparse_coef():
|
||||
# Check that the sparse_coef property works
|
||||
clf = ElasticNet()
|
||||
clf.coef_ = [1, 2, 3]
|
||||
|
||||
assert sp.isspmatrix(clf.sparse_coef_)
|
||||
assert clf.sparse_coef_.toarray().tolist()[0] == clf.coef_
|
||||
|
||||
|
||||
def test_normalize_option():
|
||||
# Check that the normalize option in enet works
|
||||
X = sp.csc_matrix([[-1], [0], [1]])
|
||||
y = [-1, 0, 1]
|
||||
clf_dense = ElasticNet(normalize=True)
|
||||
clf_sparse = ElasticNet(normalize=True)
|
||||
clf_dense.fit(X, y)
|
||||
X = sp.csc_matrix(X)
|
||||
clf_sparse.fit(X, y)
|
||||
assert_almost_equal(clf_dense.dual_gap_, 0)
|
||||
assert_array_almost_equal(clf_dense.coef_, clf_sparse.coef_)
|
||||
|
||||
|
||||
def test_lasso_zero():
|
||||
# Check that the sparse lasso can handle zero data without crashing
|
||||
X = sp.csc_matrix((3, 1))
|
||||
y = [0, 0, 0]
|
||||
T = np.array([[1], [2], [3]])
|
||||
clf = Lasso().fit(X, y)
|
||||
pred = clf.predict(T)
|
||||
assert_array_almost_equal(clf.coef_, [0])
|
||||
assert_array_almost_equal(pred, [0, 0, 0])
|
||||
assert_almost_equal(clf.dual_gap_, 0)
|
||||
|
||||
|
||||
def test_enet_toy_list_input():
|
||||
# Test ElasticNet for various values of alpha and l1_ratio with list X
|
||||
|
||||
X = np.array([[-1], [0], [1]])
|
||||
X = sp.csc_matrix(X)
|
||||
Y = [-1, 0, 1] # just a straight line
|
||||
T = np.array([[2], [3], [4]]) # test sample
|
||||
|
||||
# this should be the same as unregularized least squares
|
||||
clf = ElasticNet(alpha=0, l1_ratio=1.0)
|
||||
# catch warning about alpha=0.
|
||||
# this is discouraged but should work.
|
||||
ignore_warnings(clf.fit)(X, Y)
|
||||
pred = clf.predict(T)
|
||||
assert_array_almost_equal(clf.coef_, [1])
|
||||
assert_array_almost_equal(pred, [2, 3, 4])
|
||||
assert_almost_equal(clf.dual_gap_, 0)
|
||||
|
||||
clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=1000)
|
||||
clf.fit(X, Y)
|
||||
pred = clf.predict(T)
|
||||
assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
|
||||
assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
|
||||
assert_almost_equal(clf.dual_gap_, 0)
|
||||
|
||||
clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
|
||||
clf.fit(X, Y)
|
||||
pred = clf.predict(T)
|
||||
assert_array_almost_equal(clf.coef_, [0.45454], 3)
|
||||
assert_array_almost_equal(pred, [0.9090, 1.3636, 1.8181], 3)
|
||||
assert_almost_equal(clf.dual_gap_, 0)
|
||||
|
||||
|
||||
def test_enet_toy_explicit_sparse_input():
|
||||
# Test ElasticNet for various values of alpha and l1_ratio with sparse X
|
||||
f = ignore_warnings
|
||||
# training samples
|
||||
X = sp.lil_matrix((3, 1))
|
||||
X[0, 0] = -1
|
||||
# X[1, 0] = 0
|
||||
X[2, 0] = 1
|
||||
Y = [-1, 0, 1] # just a straight line (the identity function)
|
||||
|
||||
# test samples
|
||||
T = sp.lil_matrix((3, 1))
|
||||
T[0, 0] = 2
|
||||
T[1, 0] = 3
|
||||
T[2, 0] = 4
|
||||
|
||||
# this should be the same as lasso
|
||||
clf = ElasticNet(alpha=0, l1_ratio=1.0)
|
||||
f(clf.fit)(X, Y)
|
||||
pred = clf.predict(T)
|
||||
assert_array_almost_equal(clf.coef_, [1])
|
||||
assert_array_almost_equal(pred, [2, 3, 4])
|
||||
assert_almost_equal(clf.dual_gap_, 0)
|
||||
|
||||
clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=1000)
|
||||
clf.fit(X, Y)
|
||||
pred = clf.predict(T)
|
||||
assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
|
||||
assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
|
||||
assert_almost_equal(clf.dual_gap_, 0)
|
||||
|
||||
clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
|
||||
clf.fit(X, Y)
|
||||
pred = clf.predict(T)
|
||||
assert_array_almost_equal(clf.coef_, [0.45454], 3)
|
||||
assert_array_almost_equal(pred, [0.9090, 1.3636, 1.8181], 3)
|
||||
assert_almost_equal(clf.dual_gap_, 0)
|
||||
|
||||
|
||||
def make_sparse_data(n_samples=100, n_features=100, n_informative=10, seed=42,
|
||||
positive=False, n_targets=1):
|
||||
random_state = np.random.RandomState(seed)
|
||||
|
||||
# build an ill-posed linear regression problem with many noisy features and
|
||||
# comparatively few samples
|
||||
|
||||
# generate a ground truth model
|
||||
w = random_state.randn(n_features, n_targets)
|
||||
w[n_informative:] = 0.0 # only the top features are impacting the model
|
||||
if positive:
|
||||
w = np.abs(w)
|
||||
|
||||
X = random_state.randn(n_samples, n_features)
|
||||
rnd = random_state.uniform(size=(n_samples, n_features))
|
||||
X[rnd > 0.5] = 0.0 # 50% of zeros in input signal
|
||||
|
||||
# generate training ground truth labels
|
||||
y = np.dot(X, w)
|
||||
X = sp.csc_matrix(X)
|
||||
if n_targets == 1:
|
||||
y = np.ravel(y)
|
||||
return X, y
|
||||
|
||||
|
||||
def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive):
|
||||
n_samples, n_features, max_iter = 100, 100, 1000
|
||||
n_informative = 10
|
||||
|
||||
X, y = make_sparse_data(n_samples, n_features, n_informative,
|
||||
positive=positive)
|
||||
|
||||
X_train, X_test = X[n_samples // 2:], X[:n_samples // 2]
|
||||
y_train, y_test = y[n_samples // 2:], y[:n_samples // 2]
|
||||
|
||||
s_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept,
|
||||
max_iter=max_iter, tol=1e-7, positive=positive,
|
||||
warm_start=True)
|
||||
s_clf.fit(X_train, y_train)
|
||||
|
||||
assert_almost_equal(s_clf.dual_gap_, 0, 4)
|
||||
assert s_clf.score(X_test, y_test) > 0.85
|
||||
|
||||
# check the convergence is the same as the dense version
|
||||
d_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept,
|
||||
max_iter=max_iter, tol=1e-7, positive=positive,
|
||||
warm_start=True)
|
||||
d_clf.fit(X_train.toarray(), y_train)
|
||||
|
||||
assert_almost_equal(d_clf.dual_gap_, 0, 4)
|
||||
assert d_clf.score(X_test, y_test) > 0.85
|
||||
|
||||
assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)
|
||||
assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)
|
||||
|
||||
# check that the coefs are sparse
|
||||
assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative
|
||||
|
||||
|
||||
def test_sparse_enet_not_as_toy_dataset():
|
||||
_test_sparse_enet_not_as_toy_dataset(alpha=0.1, fit_intercept=False,
|
||||
positive=False)
|
||||
_test_sparse_enet_not_as_toy_dataset(alpha=0.1, fit_intercept=True,
|
||||
positive=False)
|
||||
_test_sparse_enet_not_as_toy_dataset(alpha=1e-3, fit_intercept=False,
|
||||
positive=True)
|
||||
_test_sparse_enet_not_as_toy_dataset(alpha=1e-3, fit_intercept=True,
|
||||
positive=True)
|
||||
|
||||
|
||||
def test_sparse_lasso_not_as_toy_dataset():
|
||||
n_samples = 100
|
||||
max_iter = 1000
|
||||
n_informative = 10
|
||||
X, y = make_sparse_data(n_samples=n_samples, n_informative=n_informative)
|
||||
|
||||
X_train, X_test = X[n_samples // 2:], X[:n_samples // 2]
|
||||
y_train, y_test = y[n_samples // 2:], y[:n_samples // 2]
|
||||
|
||||
s_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
|
||||
s_clf.fit(X_train, y_train)
|
||||
assert_almost_equal(s_clf.dual_gap_, 0, 4)
|
||||
assert s_clf.score(X_test, y_test) > 0.85
|
||||
|
||||
# check the convergence is the same as the dense version
|
||||
d_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
|
||||
d_clf.fit(X_train.toarray(), y_train)
|
||||
assert_almost_equal(d_clf.dual_gap_, 0, 4)
|
||||
assert d_clf.score(X_test, y_test) > 0.85
|
||||
|
||||
# check that the coefs are sparse
|
||||
assert np.sum(s_clf.coef_ != 0.0) == n_informative
|
||||
|
||||
|
||||
def test_enet_multitarget():
|
||||
n_targets = 3
|
||||
X, y = make_sparse_data(n_targets=n_targets)
|
||||
|
||||
estimator = ElasticNet(alpha=0.01, precompute=None)
|
||||
# XXX: There is a bug when precompute is not None!
|
||||
estimator.fit(X, y)
|
||||
coef, intercept, dual_gap = (estimator.coef_,
|
||||
estimator.intercept_,
|
||||
estimator.dual_gap_)
|
||||
|
||||
for k in range(n_targets):
|
||||
estimator.fit(X, y[:, k])
|
||||
assert_array_almost_equal(coef[k, :], estimator.coef_)
|
||||
assert_array_almost_equal(intercept[k], estimator.intercept_)
|
||||
assert_array_almost_equal(dual_gap[k], estimator.dual_gap_)
|
||||
|
||||
|
||||
def test_path_parameters():
|
||||
X, y = make_sparse_data()
|
||||
max_iter = 50
|
||||
n_alphas = 10
|
||||
clf = ElasticNetCV(n_alphas=n_alphas, eps=1e-3, max_iter=max_iter,
|
||||
l1_ratio=0.5, fit_intercept=False)
|
||||
ignore_warnings(clf.fit)(X, y) # new params
|
||||
assert_almost_equal(0.5, clf.l1_ratio)
|
||||
assert n_alphas == clf.n_alphas
|
||||
assert n_alphas == len(clf.alphas_)
|
||||
sparse_mse_path = clf.mse_path_
|
||||
ignore_warnings(clf.fit)(X.toarray(), y) # compare with dense data
|
||||
assert_almost_equal(clf.mse_path_, sparse_mse_path)
|
||||
|
||||
|
||||
def test_same_output_sparse_dense_lasso_and_enet_cv():
|
||||
X, y = make_sparse_data(n_samples=40, n_features=10)
|
||||
for normalize in [True, False]:
|
||||
clfs = ElasticNetCV(max_iter=100, normalize=normalize)
|
||||
ignore_warnings(clfs.fit)(X, y)
|
||||
clfd = ElasticNetCV(max_iter=100, normalize=normalize)
|
||||
ignore_warnings(clfd.fit)(X.toarray(), y)
|
||||
assert_almost_equal(clfs.alpha_, clfd.alpha_, 7)
|
||||
assert_almost_equal(clfs.intercept_, clfd.intercept_, 7)
|
||||
assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_)
|
||||
assert_array_almost_equal(clfs.alphas_, clfd.alphas_)
|
||||
|
||||
clfs = LassoCV(max_iter=100, cv=4, normalize=normalize)
|
||||
ignore_warnings(clfs.fit)(X, y)
|
||||
clfd = LassoCV(max_iter=100, cv=4, normalize=normalize)
|
||||
ignore_warnings(clfd.fit)(X.toarray(), y)
|
||||
assert_almost_equal(clfs.alpha_, clfd.alpha_, 7)
|
||||
assert_almost_equal(clfs.intercept_, clfd.intercept_, 7)
|
||||
assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_)
|
||||
assert_array_almost_equal(clfs.alphas_, clfd.alphas_)
|
||||
|
||||
|
||||
def test_same_multiple_output_sparse_dense():
|
||||
for normalize in [True, False]:
|
||||
l = ElasticNet(normalize=normalize)
|
||||
X = [[0, 1, 2, 3, 4],
|
||||
[0, 2, 5, 8, 11],
|
||||
[9, 10, 11, 12, 13],
|
||||
[10, 11, 12, 13, 14]]
|
||||
y = [[1, 2, 3, 4, 5],
|
||||
[1, 3, 6, 9, 12],
|
||||
[10, 11, 12, 13, 14],
|
||||
[11, 12, 13, 14, 15]]
|
||||
ignore_warnings(l.fit)(X, y)
|
||||
sample = np.array([1, 2, 3, 4, 5]).reshape(1, -1)
|
||||
predict_dense = l.predict(sample)
|
||||
|
||||
l_sp = ElasticNet(normalize=normalize)
|
||||
X_sp = sp.coo_matrix(X)
|
||||
ignore_warnings(l_sp.fit)(X_sp, y)
|
||||
sample_sparse = sp.coo_matrix(sample)
|
||||
predict_sparse = l_sp.predict(sample_sparse)
|
||||
|
||||
assert_array_almost_equal(predict_sparse, predict_dense)
|
||||
|
||||
|
||||
def test_sparse_enet_coordinate_descent():
|
||||
"""Test that a warning is issued if model does not converge"""
|
||||
clf = Lasso(max_iter=2)
|
||||
n_samples = 5
|
||||
n_features = 2
|
||||
X = sp.csc_matrix((n_samples, n_features)) * 1e50
|
||||
y = np.ones(n_samples)
|
||||
assert_warns(ConvergenceWarning, clf.fit, X, y)
|
|
@ -0,0 +1,281 @@
|
|||
"""
|
||||
Testing for Theil-Sen module (sklearn.linear_model.theil_sen)
|
||||
"""
|
||||
|
||||
# Author: Florian Wilhelm <florian.wilhelm@gmail.com>
|
||||
# License: BSD 3 clause
|
||||
import os
|
||||
import sys
|
||||
from contextlib import contextmanager
|
||||
import numpy as np
|
||||
from numpy.testing import assert_array_equal, assert_array_less
|
||||
from numpy.testing import assert_array_almost_equal, assert_warns
|
||||
from scipy.linalg import norm
|
||||
from scipy.optimize import fmin_bfgs
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
from sklearn.linear_model import LinearRegression, TheilSenRegressor
|
||||
from sklearn.linear_model._theil_sen import _spatial_median, _breakdown_point
|
||||
from sklearn.linear_model._theil_sen import _modified_weiszfeld_step
|
||||
from sklearn.utils._testing import assert_almost_equal, assert_raises
|
||||
|
||||
|
||||
@contextmanager
|
||||
def no_stdout_stderr():
|
||||
old_stdout = sys.stdout
|
||||
old_stderr = sys.stderr
|
||||
with open(os.devnull, 'w') as devnull:
|
||||
sys.stdout = devnull
|
||||
sys.stderr = devnull
|
||||
yield
|
||||
devnull.flush()
|
||||
sys.stdout = old_stdout
|
||||
sys.stderr = old_stderr
|
||||
|
||||
|
||||
def gen_toy_problem_1d(intercept=True):
|
||||
random_state = np.random.RandomState(0)
|
||||
# Linear model y = 3*x + N(2, 0.1**2)
|
||||
w = 3.
|
||||
if intercept:
|
||||
c = 2.
|
||||
n_samples = 50
|
||||
else:
|
||||
c = 0.1
|
||||
n_samples = 100
|
||||
x = random_state.normal(size=n_samples)
|
||||
noise = 0.1 * random_state.normal(size=n_samples)
|
||||
y = w * x + c + noise
|
||||
# Add some outliers
|
||||
if intercept:
|
||||
x[42], y[42] = (-2, 4)
|
||||
x[43], y[43] = (-2.5, 8)
|
||||
x[33], y[33] = (2.5, 1)
|
||||
x[49], y[49] = (2.1, 2)
|
||||
else:
|
||||
x[42], y[42] = (-2, 4)
|
||||
x[43], y[43] = (-2.5, 8)
|
||||
x[53], y[53] = (2.5, 1)
|
||||
x[60], y[60] = (2.1, 2)
|
||||
x[72], y[72] = (1.8, -7)
|
||||
return x[:, np.newaxis], y, w, c
|
||||
|
||||
|
||||
def gen_toy_problem_2d():
|
||||
random_state = np.random.RandomState(0)
|
||||
n_samples = 100
|
||||
# Linear model y = 5*x_1 + 10*x_2 + N(1, 0.1**2)
|
||||
X = random_state.normal(size=(n_samples, 2))
|
||||
w = np.array([5., 10.])
|
||||
c = 1.
|
||||
noise = 0.1 * random_state.normal(size=n_samples)
|
||||
y = np.dot(X, w) + c + noise
|
||||
# Add some outliers
|
||||
n_outliers = n_samples // 10
|
||||
ix = random_state.randint(0, n_samples, size=n_outliers)
|
||||
y[ix] = 50 * random_state.normal(size=n_outliers)
|
||||
return X, y, w, c
|
||||
|
||||
|
||||
def gen_toy_problem_4d():
|
||||
random_state = np.random.RandomState(0)
|
||||
n_samples = 10000
|
||||
# Linear model y = 5*x_1 + 10*x_2 + 42*x_3 + 7*x_4 + N(1, 0.1**2)
|
||||
X = random_state.normal(size=(n_samples, 4))
|
||||
w = np.array([5., 10., 42., 7.])
|
||||
c = 1.
|
||||
noise = 0.1 * random_state.normal(size=n_samples)
|
||||
y = np.dot(X, w) + c + noise
|
||||
# Add some outliers
|
||||
n_outliers = n_samples // 10
|
||||
ix = random_state.randint(0, n_samples, size=n_outliers)
|
||||
y[ix] = 50 * random_state.normal(size=n_outliers)
|
||||
return X, y, w, c
|
||||
|
||||
|
||||
def test_modweiszfeld_step_1d():
|
||||
X = np.array([1., 2., 3.]).reshape(3, 1)
|
||||
# Check startvalue is element of X and solution
|
||||
median = 2.
|
||||
new_y = _modified_weiszfeld_step(X, median)
|
||||
assert_array_almost_equal(new_y, median)
|
||||
# Check startvalue is not the solution
|
||||
y = 2.5
|
||||
new_y = _modified_weiszfeld_step(X, y)
|
||||
assert_array_less(median, new_y)
|
||||
assert_array_less(new_y, y)
|
||||
# Check startvalue is not the solution but element of X
|
||||
y = 3.
|
||||
new_y = _modified_weiszfeld_step(X, y)
|
||||
assert_array_less(median, new_y)
|
||||
assert_array_less(new_y, y)
|
||||
# Check that a single vector is identity
|
||||
X = np.array([1., 2., 3.]).reshape(1, 3)
|
||||
y = X[0, ]
|
||||
new_y = _modified_weiszfeld_step(X, y)
|
||||
assert_array_equal(y, new_y)
|
||||
|
||||
|
||||
def test_modweiszfeld_step_2d():
|
||||
X = np.array([0., 0., 1., 1., 0., 1.]).reshape(3, 2)
|
||||
y = np.array([0.5, 0.5])
|
||||
# Check first two iterations
|
||||
new_y = _modified_weiszfeld_step(X, y)
|
||||
assert_array_almost_equal(new_y, np.array([1 / 3, 2 / 3]))
|
||||
new_y = _modified_weiszfeld_step(X, new_y)
|
||||
assert_array_almost_equal(new_y, np.array([0.2792408, 0.7207592]))
|
||||
# Check fix point
|
||||
y = np.array([0.21132505, 0.78867497])
|
||||
new_y = _modified_weiszfeld_step(X, y)
|
||||
assert_array_almost_equal(new_y, y)
|
||||
|
||||
|
||||
def test_spatial_median_1d():
|
||||
X = np.array([1., 2., 3.]).reshape(3, 1)
|
||||
true_median = 2.
|
||||
_, median = _spatial_median(X)
|
||||
assert_array_almost_equal(median, true_median)
|
||||
# Test larger problem and for exact solution in 1d case
|
||||
random_state = np.random.RandomState(0)
|
||||
X = random_state.randint(100, size=(1000, 1))
|
||||
true_median = np.median(X.ravel())
|
||||
_, median = _spatial_median(X)
|
||||
assert_array_equal(median, true_median)
|
||||
|
||||
|
||||
def test_spatial_median_2d():
|
||||
X = np.array([0., 0., 1., 1., 0., 1.]).reshape(3, 2)
|
||||
_, median = _spatial_median(X, max_iter=100, tol=1.e-6)
|
||||
|
||||
def cost_func(y):
|
||||
dists = np.array([norm(x - y) for x in X])
|
||||
return np.sum(dists)
|
||||
|
||||
# Check if median is solution of the Fermat-Weber location problem
|
||||
fermat_weber = fmin_bfgs(cost_func, median, disp=False)
|
||||
assert_array_almost_equal(median, fermat_weber)
|
||||
# Check when maximum iteration is exceeded a warning is emitted
|
||||
assert_warns(ConvergenceWarning, _spatial_median, X, max_iter=30, tol=0.)
|
||||
|
||||
|
||||
def test_theil_sen_1d():
|
||||
X, y, w, c = gen_toy_problem_1d()
|
||||
# Check that Least Squares fails
|
||||
lstq = LinearRegression().fit(X, y)
|
||||
assert np.abs(lstq.coef_ - w) > 0.9
|
||||
# Check that Theil-Sen works
|
||||
theil_sen = TheilSenRegressor(random_state=0).fit(X, y)
|
||||
assert_array_almost_equal(theil_sen.coef_, w, 1)
|
||||
assert_array_almost_equal(theil_sen.intercept_, c, 1)
|
||||
|
||||
|
||||
def test_theil_sen_1d_no_intercept():
|
||||
X, y, w, c = gen_toy_problem_1d(intercept=False)
|
||||
# Check that Least Squares fails
|
||||
lstq = LinearRegression(fit_intercept=False).fit(X, y)
|
||||
assert np.abs(lstq.coef_ - w - c) > 0.5
|
||||
# Check that Theil-Sen works
|
||||
theil_sen = TheilSenRegressor(fit_intercept=False,
|
||||
random_state=0).fit(X, y)
|
||||
assert_array_almost_equal(theil_sen.coef_, w + c, 1)
|
||||
assert_almost_equal(theil_sen.intercept_, 0.)
|
||||
|
||||
|
||||
def test_theil_sen_2d():
|
||||
X, y, w, c = gen_toy_problem_2d()
|
||||
# Check that Least Squares fails
|
||||
lstq = LinearRegression().fit(X, y)
|
||||
assert norm(lstq.coef_ - w) > 1.0
|
||||
# Check that Theil-Sen works
|
||||
theil_sen = TheilSenRegressor(max_subpopulation=1e3,
|
||||
random_state=0).fit(X, y)
|
||||
assert_array_almost_equal(theil_sen.coef_, w, 1)
|
||||
assert_array_almost_equal(theil_sen.intercept_, c, 1)
|
||||
|
||||
|
||||
def test_calc_breakdown_point():
|
||||
bp = _breakdown_point(1e10, 2)
|
||||
assert np.abs(bp - 1 + 1 / (np.sqrt(2))) < 1.e-6
|
||||
|
||||
|
||||
def test_checksubparams_negative_subpopulation():
|
||||
X, y, w, c = gen_toy_problem_1d()
|
||||
theil_sen = TheilSenRegressor(max_subpopulation=-1, random_state=0)
|
||||
assert_raises(ValueError, theil_sen.fit, X, y)
|
||||
|
||||
|
||||
def test_checksubparams_too_few_subsamples():
|
||||
X, y, w, c = gen_toy_problem_1d()
|
||||
theil_sen = TheilSenRegressor(n_subsamples=1, random_state=0)
|
||||
assert_raises(ValueError, theil_sen.fit, X, y)
|
||||
|
||||
|
||||
def test_checksubparams_too_many_subsamples():
|
||||
X, y, w, c = gen_toy_problem_1d()
|
||||
theil_sen = TheilSenRegressor(n_subsamples=101, random_state=0)
|
||||
assert_raises(ValueError, theil_sen.fit, X, y)
|
||||
|
||||
|
||||
def test_checksubparams_n_subsamples_if_less_samples_than_features():
|
||||
random_state = np.random.RandomState(0)
|
||||
n_samples, n_features = 10, 20
|
||||
X = random_state.normal(size=(n_samples, n_features))
|
||||
y = random_state.normal(size=n_samples)
|
||||
theil_sen = TheilSenRegressor(n_subsamples=9, random_state=0)
|
||||
assert_raises(ValueError, theil_sen.fit, X, y)
|
||||
|
||||
|
||||
def test_subpopulation():
|
||||
X, y, w, c = gen_toy_problem_4d()
|
||||
theil_sen = TheilSenRegressor(max_subpopulation=250,
|
||||
random_state=0).fit(X, y)
|
||||
assert_array_almost_equal(theil_sen.coef_, w, 1)
|
||||
assert_array_almost_equal(theil_sen.intercept_, c, 1)
|
||||
|
||||
|
||||
def test_subsamples():
|
||||
X, y, w, c = gen_toy_problem_4d()
|
||||
theil_sen = TheilSenRegressor(n_subsamples=X.shape[0],
|
||||
random_state=0).fit(X, y)
|
||||
lstq = LinearRegression().fit(X, y)
|
||||
# Check for exact the same results as Least Squares
|
||||
assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 9)
|
||||
|
||||
|
||||
def test_verbosity():
|
||||
X, y, w, c = gen_toy_problem_1d()
|
||||
# Check that Theil-Sen can be verbose
|
||||
with no_stdout_stderr():
|
||||
TheilSenRegressor(verbose=True, random_state=0).fit(X, y)
|
||||
TheilSenRegressor(verbose=True,
|
||||
max_subpopulation=10,
|
||||
random_state=0).fit(X, y)
|
||||
|
||||
|
||||
def test_theil_sen_parallel():
|
||||
X, y, w, c = gen_toy_problem_2d()
|
||||
# Check that Least Squares fails
|
||||
lstq = LinearRegression().fit(X, y)
|
||||
assert norm(lstq.coef_ - w) > 1.0
|
||||
# Check that Theil-Sen works
|
||||
theil_sen = TheilSenRegressor(n_jobs=2,
|
||||
random_state=0,
|
||||
max_subpopulation=2e3).fit(X, y)
|
||||
assert_array_almost_equal(theil_sen.coef_, w, 1)
|
||||
assert_array_almost_equal(theil_sen.intercept_, c, 1)
|
||||
|
||||
|
||||
def test_less_samples_than_features():
|
||||
random_state = np.random.RandomState(0)
|
||||
n_samples, n_features = 10, 20
|
||||
X = random_state.normal(size=(n_samples, n_features))
|
||||
y = random_state.normal(size=n_samples)
|
||||
# Check that Theil-Sen falls back to Least Squares if fit_intercept=False
|
||||
theil_sen = TheilSenRegressor(fit_intercept=False,
|
||||
random_state=0).fit(X, y)
|
||||
lstq = LinearRegression(fit_intercept=False).fit(X, y)
|
||||
assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12)
|
||||
# Check fit_intercept=True case. This will not be equal to the Least
|
||||
# Squares solution since the intercept is calculated differently.
|
||||
theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y)
|
||||
y_pred = theil_sen.predict(X)
|
||||
assert_array_almost_equal(y_pred, y, 12)
|
Loading…
Add table
Add a link
Reference in a new issue