1829 lines
74 KiB
Python
1829 lines
74 KiB
Python
import os
|
|
import sys
|
|
import numpy as np
|
|
import scipy.sparse as sp
|
|
from scipy import linalg, optimize, sparse
|
|
|
|
import pytest
|
|
|
|
from sklearn.base import clone
|
|
from sklearn.datasets import load_iris, make_classification
|
|
from sklearn.metrics import log_loss
|
|
from sklearn.metrics import get_scorer
|
|
from sklearn.model_selection import StratifiedKFold
|
|
from sklearn.model_selection import GridSearchCV
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.model_selection import cross_val_score
|
|
from sklearn.preprocessing import LabelEncoder, StandardScaler
|
|
from sklearn.utils import compute_class_weight, _IS_32BIT
|
|
from sklearn.utils._testing import assert_almost_equal
|
|
from sklearn.utils._testing import assert_allclose
|
|
from sklearn.utils._testing import assert_array_almost_equal
|
|
from sklearn.utils._testing import assert_array_equal
|
|
from sklearn.utils._testing import assert_raise_message
|
|
from sklearn.utils._testing import assert_raises
|
|
from sklearn.utils._testing import assert_warns
|
|
from sklearn.utils._testing import ignore_warnings
|
|
from sklearn.utils._testing import assert_warns_message
|
|
from sklearn.utils import shuffle
|
|
from sklearn.linear_model import SGDClassifier
|
|
from sklearn.preprocessing import scale
|
|
from sklearn.utils._testing import skip_if_no_parallel
|
|
|
|
from sklearn.exceptions import ConvergenceWarning
|
|
from sklearn.linear_model._logistic import (
|
|
LogisticRegression,
|
|
_logistic_regression_path, LogisticRegressionCV,
|
|
_logistic_loss_and_grad, _logistic_grad_hess,
|
|
_multinomial_grad_hess, _logistic_loss,
|
|
_log_reg_scoring_path)
|
|
|
|
X = [[-1, 0], [0, 1], [1, 1]]
|
|
X_sp = sp.csr_matrix(X)
|
|
Y1 = [0, 1, 1]
|
|
Y2 = [2, 1, 0]
|
|
iris = load_iris()
|
|
|
|
|
|
def check_predictions(clf, X, y):
|
|
"""Check that the model is able to fit the classification data"""
|
|
n_samples = len(y)
|
|
classes = np.unique(y)
|
|
n_classes = classes.shape[0]
|
|
|
|
predicted = clf.fit(X, y).predict(X)
|
|
assert_array_equal(clf.classes_, classes)
|
|
|
|
assert predicted.shape == (n_samples,)
|
|
assert_array_equal(predicted, y)
|
|
|
|
probabilities = clf.predict_proba(X)
|
|
assert probabilities.shape == (n_samples, n_classes)
|
|
assert_array_almost_equal(probabilities.sum(axis=1), np.ones(n_samples))
|
|
assert_array_equal(probabilities.argmax(axis=1), y)
|
|
|
|
|
|
def test_predict_2_classes():
|
|
# Simple sanity check on a 2 classes dataset
|
|
# Make sure it predicts the correct result on simple datasets.
|
|
check_predictions(LogisticRegression(random_state=0), X, Y1)
|
|
check_predictions(LogisticRegression(random_state=0), X_sp, Y1)
|
|
|
|
check_predictions(LogisticRegression(C=100, random_state=0), X, Y1)
|
|
check_predictions(LogisticRegression(C=100, random_state=0), X_sp, Y1)
|
|
|
|
check_predictions(LogisticRegression(fit_intercept=False,
|
|
random_state=0), X, Y1)
|
|
check_predictions(LogisticRegression(fit_intercept=False,
|
|
random_state=0), X_sp, Y1)
|
|
|
|
|
|
def test_error():
|
|
# Test for appropriate exception on errors
|
|
msg = "Penalty term must be positive"
|
|
assert_raise_message(ValueError, msg,
|
|
LogisticRegression(C=-1).fit, X, Y1)
|
|
assert_raise_message(ValueError, msg,
|
|
LogisticRegression(C="test").fit, X, Y1)
|
|
|
|
msg = "is not a valid scoring value"
|
|
assert_raise_message(ValueError, msg,
|
|
LogisticRegressionCV(scoring='bad-scorer', cv=2).fit,
|
|
X, Y1)
|
|
|
|
for LR in [LogisticRegression, LogisticRegressionCV]:
|
|
msg = "Tolerance for stopping criteria must be positive"
|
|
assert_raise_message(ValueError, msg, LR(tol=-1).fit, X, Y1)
|
|
assert_raise_message(ValueError, msg, LR(tol="test").fit, X, Y1)
|
|
|
|
msg = "Maximum number of iteration must be positive"
|
|
assert_raise_message(ValueError, msg, LR(max_iter=-1).fit, X, Y1)
|
|
assert_raise_message(ValueError, msg, LR(max_iter="test").fit, X, Y1)
|
|
|
|
|
|
def test_logistic_cv_mock_scorer():
|
|
|
|
class MockScorer:
|
|
def __init__(self):
|
|
self.calls = 0
|
|
self.scores = [0.1, 0.4, 0.8, 0.5]
|
|
|
|
def __call__(self, model, X, y, sample_weight=None):
|
|
score = self.scores[self.calls % len(self.scores)]
|
|
self.calls += 1
|
|
return score
|
|
|
|
mock_scorer = MockScorer()
|
|
Cs = [1, 2, 3, 4]
|
|
cv = 2
|
|
|
|
lr = LogisticRegressionCV(Cs=Cs, scoring=mock_scorer, cv=cv)
|
|
lr.fit(X, Y1)
|
|
|
|
# Cs[2] has the highest score (0.8) from MockScorer
|
|
assert lr.C_[0] == Cs[2]
|
|
|
|
# scorer called 8 times (cv*len(Cs))
|
|
assert mock_scorer.calls == cv * len(Cs)
|
|
|
|
# reset mock_scorer
|
|
mock_scorer.calls = 0
|
|
custom_score = lr.score(X, lr.predict(X))
|
|
|
|
assert custom_score == mock_scorer.scores[0]
|
|
assert mock_scorer.calls == 1
|
|
|
|
|
|
def test_logistic_cv_score_does_not_warn_by_default():
|
|
lr = LogisticRegressionCV(cv=2)
|
|
lr.fit(X, Y1)
|
|
|
|
with pytest.warns(None) as record:
|
|
lr.score(X, lr.predict(X))
|
|
assert len(record) == 0
|
|
|
|
|
|
@skip_if_no_parallel
|
|
def test_lr_liblinear_warning():
|
|
n_samples, n_features = iris.data.shape
|
|
target = iris.target_names[iris.target]
|
|
|
|
lr = LogisticRegression(solver='liblinear', n_jobs=2)
|
|
assert_warns_message(UserWarning,
|
|
"'n_jobs' > 1 does not have any effect when"
|
|
" 'solver' is set to 'liblinear'. Got 'n_jobs'"
|
|
" = 2.",
|
|
lr.fit, iris.data, target)
|
|
|
|
|
|
def test_predict_3_classes():
|
|
check_predictions(LogisticRegression(C=10), X, Y2)
|
|
check_predictions(LogisticRegression(C=10), X_sp, Y2)
|
|
|
|
|
|
def test_predict_iris():
|
|
# Test logistic regression with the iris dataset
|
|
n_samples, n_features = iris.data.shape
|
|
|
|
target = iris.target_names[iris.target]
|
|
|
|
# Test that both multinomial and OvR solvers handle
|
|
# multiclass data correctly and give good accuracy
|
|
# score (>0.95) for the training data.
|
|
for clf in [LogisticRegression(C=len(iris.data), solver='liblinear',
|
|
multi_class='ovr'),
|
|
LogisticRegression(C=len(iris.data), solver='lbfgs',
|
|
multi_class='multinomial'),
|
|
LogisticRegression(C=len(iris.data), solver='newton-cg',
|
|
multi_class='multinomial'),
|
|
LogisticRegression(C=len(iris.data), solver='sag', tol=1e-2,
|
|
multi_class='ovr', random_state=42),
|
|
LogisticRegression(C=len(iris.data), solver='saga', tol=1e-2,
|
|
multi_class='ovr', random_state=42)
|
|
]:
|
|
clf.fit(iris.data, target)
|
|
assert_array_equal(np.unique(target), clf.classes_)
|
|
|
|
pred = clf.predict(iris.data)
|
|
assert np.mean(pred == target) > .95
|
|
|
|
probabilities = clf.predict_proba(iris.data)
|
|
assert_array_almost_equal(probabilities.sum(axis=1),
|
|
np.ones(n_samples))
|
|
|
|
pred = iris.target_names[probabilities.argmax(axis=1)]
|
|
assert np.mean(pred == target) > .95
|
|
|
|
|
|
@pytest.mark.parametrize('solver', ['lbfgs', 'newton-cg', 'sag', 'saga'])
|
|
def test_multinomial_validation(solver):
|
|
lr = LogisticRegression(C=-1, solver=solver, multi_class='multinomial')
|
|
assert_raises(ValueError, lr.fit, [[0, 1], [1, 0]], [0, 1])
|
|
|
|
|
|
@pytest.mark.parametrize('LR', [LogisticRegression, LogisticRegressionCV])
|
|
def test_check_solver_option(LR):
|
|
X, y = iris.data, iris.target
|
|
|
|
msg = ("Logistic Regression supports only solvers in ['liblinear', "
|
|
"'newton-cg', 'lbfgs', 'sag', 'saga'], got wrong_name.")
|
|
lr = LR(solver="wrong_name", multi_class="ovr")
|
|
assert_raise_message(ValueError, msg, lr.fit, X, y)
|
|
|
|
msg = ("multi_class should be 'multinomial', 'ovr' or 'auto'. "
|
|
"Got wrong_name")
|
|
lr = LR(solver='newton-cg', multi_class="wrong_name")
|
|
assert_raise_message(ValueError, msg, lr.fit, X, y)
|
|
|
|
# only 'liblinear' solver
|
|
msg = "Solver liblinear does not support a multinomial backend."
|
|
lr = LR(solver='liblinear', multi_class='multinomial')
|
|
assert_raise_message(ValueError, msg, lr.fit, X, y)
|
|
|
|
# all solvers except 'liblinear' and 'saga'
|
|
for solver in ['newton-cg', 'lbfgs', 'sag']:
|
|
msg = ("Solver %s supports only 'l2' or 'none' penalties," %
|
|
solver)
|
|
lr = LR(solver=solver, penalty='l1', multi_class='ovr')
|
|
assert_raise_message(ValueError, msg, lr.fit, X, y)
|
|
for solver in ['newton-cg', 'lbfgs', 'sag', 'saga']:
|
|
msg = ("Solver %s supports only dual=False, got dual=True" %
|
|
solver)
|
|
lr = LR(solver=solver, dual=True, multi_class='ovr')
|
|
assert_raise_message(ValueError, msg, lr.fit, X, y)
|
|
|
|
# only saga supports elasticnet. We only test for liblinear because the
|
|
# error is raised before for the other solvers (solver %s supports only l2
|
|
# penalties)
|
|
for solver in ['liblinear']:
|
|
msg = ("Only 'saga' solver supports elasticnet penalty, got "
|
|
"solver={}.".format(solver))
|
|
lr = LR(solver=solver, penalty='elasticnet')
|
|
assert_raise_message(ValueError, msg, lr.fit, X, y)
|
|
|
|
# liblinear does not support penalty='none'
|
|
msg = "penalty='none' is not supported for the liblinear solver"
|
|
lr = LR(penalty='none', solver='liblinear')
|
|
assert_raise_message(ValueError, msg, lr.fit, X, y)
|
|
|
|
|
|
@pytest.mark.parametrize('solver', ['lbfgs', 'newton-cg', 'sag', 'saga'])
|
|
def test_multinomial_binary(solver):
|
|
# Test multinomial LR on a binary problem.
|
|
target = (iris.target > 0).astype(np.intp)
|
|
target = np.array(["setosa", "not-setosa"])[target]
|
|
|
|
clf = LogisticRegression(solver=solver, multi_class='multinomial',
|
|
random_state=42, max_iter=2000)
|
|
clf.fit(iris.data, target)
|
|
|
|
assert clf.coef_.shape == (1, iris.data.shape[1])
|
|
assert clf.intercept_.shape == (1,)
|
|
assert_array_equal(clf.predict(iris.data), target)
|
|
|
|
mlr = LogisticRegression(solver=solver, multi_class='multinomial',
|
|
random_state=42, fit_intercept=False)
|
|
mlr.fit(iris.data, target)
|
|
pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data),
|
|
axis=1)]
|
|
assert np.mean(pred == target) > .9
|
|
|
|
|
|
def test_multinomial_binary_probabilities():
|
|
# Test multinomial LR gives expected probabilities based on the
|
|
# decision function, for a binary problem.
|
|
X, y = make_classification()
|
|
clf = LogisticRegression(multi_class='multinomial', solver='saga')
|
|
clf.fit(X, y)
|
|
|
|
decision = clf.decision_function(X)
|
|
proba = clf.predict_proba(X)
|
|
|
|
expected_proba_class_1 = (np.exp(decision) /
|
|
(np.exp(decision) + np.exp(-decision)))
|
|
expected_proba = np.c_[1 - expected_proba_class_1, expected_proba_class_1]
|
|
|
|
assert_almost_equal(proba, expected_proba)
|
|
|
|
|
|
def test_sparsify():
|
|
# Test sparsify and densify members.
|
|
n_samples, n_features = iris.data.shape
|
|
target = iris.target_names[iris.target]
|
|
clf = LogisticRegression(random_state=0).fit(iris.data, target)
|
|
|
|
pred_d_d = clf.decision_function(iris.data)
|
|
|
|
clf.sparsify()
|
|
assert sp.issparse(clf.coef_)
|
|
pred_s_d = clf.decision_function(iris.data)
|
|
|
|
sp_data = sp.coo_matrix(iris.data)
|
|
pred_s_s = clf.decision_function(sp_data)
|
|
|
|
clf.densify()
|
|
pred_d_s = clf.decision_function(sp_data)
|
|
|
|
assert_array_almost_equal(pred_d_d, pred_s_d)
|
|
assert_array_almost_equal(pred_d_d, pred_s_s)
|
|
assert_array_almost_equal(pred_d_d, pred_d_s)
|
|
|
|
|
|
def test_inconsistent_input():
|
|
# Test that an exception is raised on inconsistent input
|
|
rng = np.random.RandomState(0)
|
|
X_ = rng.random_sample((5, 10))
|
|
y_ = np.ones(X_.shape[0])
|
|
y_[0] = 0
|
|
|
|
clf = LogisticRegression(random_state=0)
|
|
|
|
# Wrong dimensions for training data
|
|
y_wrong = y_[:-1]
|
|
assert_raises(ValueError, clf.fit, X, y_wrong)
|
|
|
|
# Wrong dimensions for test data
|
|
assert_raises(ValueError, clf.fit(X_, y_).predict,
|
|
rng.random_sample((3, 12)))
|
|
|
|
|
|
def test_write_parameters():
|
|
# Test that we can write to coef_ and intercept_
|
|
clf = LogisticRegression(random_state=0)
|
|
clf.fit(X, Y1)
|
|
clf.coef_[:] = 0
|
|
clf.intercept_[:] = 0
|
|
assert_array_almost_equal(clf.decision_function(X), 0)
|
|
|
|
|
|
def test_nan():
|
|
# Test proper NaN handling.
|
|
# Regression test for Issue #252: fit used to go into an infinite loop.
|
|
Xnan = np.array(X, dtype=np.float64)
|
|
Xnan[0, 1] = np.nan
|
|
logistic = LogisticRegression(random_state=0)
|
|
assert_raises(ValueError, logistic.fit, Xnan, Y1)
|
|
|
|
|
|
def test_consistency_path():
|
|
# Test that the path algorithm is consistent
|
|
rng = np.random.RandomState(0)
|
|
X = np.concatenate((rng.randn(100, 2) + [1, 1], rng.randn(100, 2)))
|
|
y = [1] * 100 + [-1] * 100
|
|
Cs = np.logspace(0, 4, 10)
|
|
|
|
f = ignore_warnings
|
|
# can't test with fit_intercept=True since LIBLINEAR
|
|
# penalizes the intercept
|
|
for solver in ['sag', 'saga']:
|
|
coefs, Cs, _ = f(_logistic_regression_path)(
|
|
X, y, Cs=Cs, fit_intercept=False, tol=1e-5, solver=solver,
|
|
max_iter=1000, multi_class='ovr', random_state=0)
|
|
for i, C in enumerate(Cs):
|
|
lr = LogisticRegression(C=C, fit_intercept=False, tol=1e-5,
|
|
solver=solver, multi_class='ovr',
|
|
random_state=0, max_iter=1000)
|
|
lr.fit(X, y)
|
|
lr_coef = lr.coef_.ravel()
|
|
assert_array_almost_equal(lr_coef, coefs[i], decimal=4,
|
|
err_msg="with solver = %s" % solver)
|
|
|
|
# test for fit_intercept=True
|
|
for solver in ('lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'):
|
|
Cs = [1e3]
|
|
coefs, Cs, _ = f(_logistic_regression_path)(
|
|
X, y, Cs=Cs, tol=1e-6, solver=solver,
|
|
intercept_scaling=10000., random_state=0, multi_class='ovr')
|
|
lr = LogisticRegression(C=Cs[0], tol=1e-4,
|
|
intercept_scaling=10000., random_state=0,
|
|
multi_class='ovr', solver=solver)
|
|
lr.fit(X, y)
|
|
lr_coef = np.concatenate([lr.coef_.ravel(), lr.intercept_])
|
|
assert_array_almost_equal(lr_coef, coefs[0], decimal=4,
|
|
err_msg="with solver = %s" % solver)
|
|
|
|
|
|
def test_logistic_regression_path_convergence_fail():
|
|
rng = np.random.RandomState(0)
|
|
X = np.concatenate((rng.randn(100, 2) + [1, 1], rng.randn(100, 2)))
|
|
y = [1] * 100 + [-1] * 100
|
|
Cs = [1e3]
|
|
|
|
# Check that the convergence message points to both a model agnostic
|
|
# advice (scaling the data) and to the logistic regression specific
|
|
# documentation that includes hints on the solver configuration.
|
|
with pytest.warns(ConvergenceWarning) as record:
|
|
_logistic_regression_path(
|
|
X, y, Cs=Cs, tol=0., max_iter=1, random_state=0, verbose=0)
|
|
|
|
assert len(record) == 1
|
|
warn_msg = record[0].message.args[0]
|
|
assert "lbfgs failed to converge" in warn_msg
|
|
assert "Increase the number of iterations" in warn_msg
|
|
assert "scale the data" in warn_msg
|
|
assert "linear_model.html#logistic-regression" in warn_msg
|
|
|
|
|
|
def test_liblinear_dual_random_state():
|
|
# random_state is relevant for liblinear solver only if dual=True
|
|
X, y = make_classification(n_samples=20, random_state=0)
|
|
lr1 = LogisticRegression(random_state=0, dual=True, max_iter=1, tol=1e-15,
|
|
solver='liblinear', multi_class='ovr')
|
|
lr1.fit(X, y)
|
|
lr2 = LogisticRegression(random_state=0, dual=True, max_iter=1, tol=1e-15,
|
|
solver='liblinear', multi_class='ovr')
|
|
lr2.fit(X, y)
|
|
lr3 = LogisticRegression(random_state=8, dual=True, max_iter=1, tol=1e-15,
|
|
solver='liblinear', multi_class='ovr')
|
|
lr3.fit(X, y)
|
|
|
|
# same result for same random state
|
|
assert_array_almost_equal(lr1.coef_, lr2.coef_)
|
|
# different results for different random states
|
|
msg = "Arrays are not almost equal to 6 decimals"
|
|
assert_raise_message(AssertionError, msg,
|
|
assert_array_almost_equal, lr1.coef_, lr3.coef_)
|
|
|
|
|
|
def test_logistic_loss_and_grad():
|
|
X_ref, y = make_classification(n_samples=20, random_state=0)
|
|
n_features = X_ref.shape[1]
|
|
|
|
X_sp = X_ref.copy()
|
|
X_sp[X_sp < .1] = 0
|
|
X_sp = sp.csr_matrix(X_sp)
|
|
for X in (X_ref, X_sp):
|
|
w = np.zeros(n_features)
|
|
|
|
# First check that our derivation of the grad is correct
|
|
loss, grad = _logistic_loss_and_grad(w, X, y, alpha=1.)
|
|
approx_grad = optimize.approx_fprime(
|
|
w, lambda w: _logistic_loss_and_grad(w, X, y, alpha=1.)[0], 1e-3
|
|
)
|
|
assert_array_almost_equal(grad, approx_grad, decimal=2)
|
|
|
|
# Second check that our intercept implementation is good
|
|
w = np.zeros(n_features + 1)
|
|
loss_interp, grad_interp = _logistic_loss_and_grad(
|
|
w, X, y, alpha=1.
|
|
)
|
|
assert_array_almost_equal(loss, loss_interp)
|
|
|
|
approx_grad = optimize.approx_fprime(
|
|
w, lambda w: _logistic_loss_and_grad(w, X, y, alpha=1.)[0], 1e-3
|
|
)
|
|
assert_array_almost_equal(grad_interp, approx_grad, decimal=2)
|
|
|
|
|
|
def test_logistic_grad_hess():
|
|
rng = np.random.RandomState(0)
|
|
n_samples, n_features = 50, 5
|
|
X_ref = rng.randn(n_samples, n_features)
|
|
y = np.sign(X_ref.dot(5 * rng.randn(n_features)))
|
|
X_ref -= X_ref.mean()
|
|
X_ref /= X_ref.std()
|
|
X_sp = X_ref.copy()
|
|
X_sp[X_sp < .1] = 0
|
|
X_sp = sp.csr_matrix(X_sp)
|
|
for X in (X_ref, X_sp):
|
|
w = np.full(n_features, .1)
|
|
|
|
# First check that _logistic_grad_hess is consistent
|
|
# with _logistic_loss_and_grad
|
|
loss, grad = _logistic_loss_and_grad(w, X, y, alpha=1.)
|
|
grad_2, hess = _logistic_grad_hess(w, X, y, alpha=1.)
|
|
assert_array_almost_equal(grad, grad_2)
|
|
|
|
# Now check our hessian along the second direction of the grad
|
|
vector = np.zeros_like(grad)
|
|
vector[1] = 1
|
|
hess_col = hess(vector)
|
|
|
|
# Computation of the Hessian is particularly fragile to numerical
|
|
# errors when doing simple finite differences. Here we compute the
|
|
# grad along a path in the direction of the vector and then use a
|
|
# least-square regression to estimate the slope
|
|
e = 1e-3
|
|
d_x = np.linspace(-e, e, 30)
|
|
d_grad = np.array([
|
|
_logistic_loss_and_grad(w + t * vector, X, y, alpha=1.)[1]
|
|
for t in d_x
|
|
])
|
|
|
|
d_grad -= d_grad.mean(axis=0)
|
|
approx_hess_col = linalg.lstsq(d_x[:, np.newaxis], d_grad)[0].ravel()
|
|
|
|
assert_array_almost_equal(approx_hess_col, hess_col, decimal=3)
|
|
|
|
# Second check that our intercept implementation is good
|
|
w = np.zeros(n_features + 1)
|
|
loss_interp, grad_interp = _logistic_loss_and_grad(w, X, y, alpha=1.)
|
|
loss_interp_2 = _logistic_loss(w, X, y, alpha=1.)
|
|
grad_interp_2, hess = _logistic_grad_hess(w, X, y, alpha=1.)
|
|
assert_array_almost_equal(loss_interp, loss_interp_2)
|
|
assert_array_almost_equal(grad_interp, grad_interp_2)
|
|
|
|
|
|
def test_logistic_cv():
|
|
# test for LogisticRegressionCV object
|
|
n_samples, n_features = 50, 5
|
|
rng = np.random.RandomState(0)
|
|
X_ref = rng.randn(n_samples, n_features)
|
|
y = np.sign(X_ref.dot(5 * rng.randn(n_features)))
|
|
X_ref -= X_ref.mean()
|
|
X_ref /= X_ref.std()
|
|
lr_cv = LogisticRegressionCV(Cs=[1.], fit_intercept=False,
|
|
solver='liblinear', multi_class='ovr', cv=3)
|
|
lr_cv.fit(X_ref, y)
|
|
lr = LogisticRegression(C=1., fit_intercept=False,
|
|
solver='liblinear', multi_class='ovr')
|
|
lr.fit(X_ref, y)
|
|
assert_array_almost_equal(lr.coef_, lr_cv.coef_)
|
|
|
|
assert_array_equal(lr_cv.coef_.shape, (1, n_features))
|
|
assert_array_equal(lr_cv.classes_, [-1, 1])
|
|
assert len(lr_cv.classes_) == 2
|
|
|
|
coefs_paths = np.asarray(list(lr_cv.coefs_paths_.values()))
|
|
assert_array_equal(coefs_paths.shape, (1, 3, 1, n_features))
|
|
assert_array_equal(lr_cv.Cs_.shape, (1,))
|
|
scores = np.asarray(list(lr_cv.scores_.values()))
|
|
assert_array_equal(scores.shape, (1, 3, 1))
|
|
|
|
|
|
@pytest.mark.parametrize('scoring, multiclass_agg_list',
|
|
[('accuracy', ['']),
|
|
('precision', ['_macro', '_weighted']),
|
|
# no need to test for micro averaging because it
|
|
# is the same as accuracy for f1, precision,
|
|
# and recall (see https://github.com/
|
|
# scikit-learn/scikit-learn/pull/
|
|
# 11578#discussion_r203250062)
|
|
('f1', ['_macro', '_weighted']),
|
|
('neg_log_loss', ['']),
|
|
('recall', ['_macro', '_weighted'])])
|
|
def test_logistic_cv_multinomial_score(scoring, multiclass_agg_list):
|
|
# test that LogisticRegressionCV uses the right score to compute its
|
|
# cross-validation scores when using a multinomial scoring
|
|
# see https://github.com/scikit-learn/scikit-learn/issues/8720
|
|
X, y = make_classification(n_samples=100, random_state=0, n_classes=3,
|
|
n_informative=6)
|
|
train, test = np.arange(80), np.arange(80, 100)
|
|
lr = LogisticRegression(C=1., multi_class='multinomial')
|
|
# we use lbfgs to support multinomial
|
|
params = lr.get_params()
|
|
# we store the params to set them further in _log_reg_scoring_path
|
|
for key in ['C', 'n_jobs', 'warm_start']:
|
|
del params[key]
|
|
lr.fit(X[train], y[train])
|
|
for averaging in multiclass_agg_list:
|
|
scorer = get_scorer(scoring + averaging)
|
|
assert_array_almost_equal(
|
|
_log_reg_scoring_path(X, y, train, test, Cs=[1.],
|
|
scoring=scorer, **params)[2][0],
|
|
scorer(lr, X[test], y[test]))
|
|
|
|
|
|
def test_multinomial_logistic_regression_string_inputs():
|
|
# Test with string labels for LogisticRegression(CV)
|
|
n_samples, n_features, n_classes = 50, 5, 3
|
|
X_ref, y = make_classification(n_samples=n_samples, n_features=n_features,
|
|
n_classes=n_classes, n_informative=3,
|
|
random_state=0)
|
|
y_str = LabelEncoder().fit(['bar', 'baz', 'foo']).inverse_transform(y)
|
|
# For numerical labels, let y values be taken from set (-1, 0, 1)
|
|
y = np.array(y) - 1
|
|
# Test for string labels
|
|
lr = LogisticRegression(multi_class='multinomial')
|
|
lr_cv = LogisticRegressionCV(multi_class='multinomial', Cs=3)
|
|
lr_str = LogisticRegression(multi_class='multinomial')
|
|
lr_cv_str = LogisticRegressionCV(multi_class='multinomial', Cs=3)
|
|
|
|
lr.fit(X_ref, y)
|
|
lr_cv.fit(X_ref, y)
|
|
lr_str.fit(X_ref, y_str)
|
|
lr_cv_str.fit(X_ref, y_str)
|
|
|
|
assert_array_almost_equal(lr.coef_, lr_str.coef_)
|
|
assert sorted(lr_str.classes_) == ['bar', 'baz', 'foo']
|
|
assert_array_almost_equal(lr_cv.coef_, lr_cv_str.coef_)
|
|
assert sorted(lr_str.classes_) == ['bar', 'baz', 'foo']
|
|
assert sorted(lr_cv_str.classes_) == ['bar', 'baz', 'foo']
|
|
|
|
# The predictions should be in original labels
|
|
assert sorted(np.unique(lr_str.predict(X_ref))) == ['bar', 'baz', 'foo']
|
|
assert sorted(np.unique(lr_cv_str.predict(X_ref))) == ['bar', 'baz', 'foo']
|
|
|
|
# Make sure class weights can be given with string labels
|
|
lr_cv_str = LogisticRegression(
|
|
class_weight={'bar': 1, 'baz': 2, 'foo': 0},
|
|
multi_class='multinomial').fit(X_ref, y_str)
|
|
assert sorted(np.unique(lr_cv_str.predict(X_ref))) == ['bar', 'baz']
|
|
|
|
|
|
def test_logistic_cv_sparse():
|
|
X, y = make_classification(n_samples=50, n_features=5,
|
|
random_state=0)
|
|
X[X < 1.0] = 0.0
|
|
csr = sp.csr_matrix(X)
|
|
|
|
clf = LogisticRegressionCV()
|
|
clf.fit(X, y)
|
|
clfs = LogisticRegressionCV()
|
|
clfs.fit(csr, y)
|
|
assert_array_almost_equal(clfs.coef_, clf.coef_)
|
|
assert_array_almost_equal(clfs.intercept_, clf.intercept_)
|
|
assert clfs.C_ == clf.C_
|
|
|
|
|
|
def test_intercept_logistic_helper():
|
|
n_samples, n_features = 10, 5
|
|
X, y = make_classification(n_samples=n_samples, n_features=n_features,
|
|
random_state=0)
|
|
|
|
# Fit intercept case.
|
|
alpha = 1.
|
|
w = np.ones(n_features + 1)
|
|
grad_interp, hess_interp = _logistic_grad_hess(w, X, y, alpha)
|
|
loss_interp = _logistic_loss(w, X, y, alpha)
|
|
|
|
# Do not fit intercept. This can be considered equivalent to adding
|
|
# a feature vector of ones, i.e column of one vectors.
|
|
X_ = np.hstack((X, np.ones(10)[:, np.newaxis]))
|
|
grad, hess = _logistic_grad_hess(w, X_, y, alpha)
|
|
loss = _logistic_loss(w, X_, y, alpha)
|
|
|
|
# In the fit_intercept=False case, the feature vector of ones is
|
|
# penalized. This should be taken care of.
|
|
assert_almost_equal(loss_interp + 0.5 * (w[-1] ** 2), loss)
|
|
|
|
# Check gradient.
|
|
assert_array_almost_equal(grad_interp[:n_features], grad[:n_features])
|
|
assert_almost_equal(grad_interp[-1] + alpha * w[-1], grad[-1])
|
|
|
|
rng = np.random.RandomState(0)
|
|
grad = rng.rand(n_features + 1)
|
|
hess_interp = hess_interp(grad)
|
|
hess = hess(grad)
|
|
assert_array_almost_equal(hess_interp[:n_features], hess[:n_features])
|
|
assert_almost_equal(hess_interp[-1] + alpha * grad[-1], hess[-1])
|
|
|
|
|
|
def test_ovr_multinomial_iris():
|
|
# Test that OvR and multinomial are correct using the iris dataset.
|
|
train, target = iris.data, iris.target
|
|
n_samples, n_features = train.shape
|
|
|
|
# The cv indices from stratified kfold (where stratification is done based
|
|
# on the fine-grained iris classes, i.e, before the classes 0 and 1 are
|
|
# conflated) is used for both clf and clf1
|
|
n_cv = 2
|
|
cv = StratifiedKFold(n_cv)
|
|
precomputed_folds = list(cv.split(train, target))
|
|
|
|
# Train clf on the original dataset where classes 0 and 1 are separated
|
|
clf = LogisticRegressionCV(cv=precomputed_folds, multi_class='ovr')
|
|
clf.fit(train, target)
|
|
|
|
# Conflate classes 0 and 1 and train clf1 on this modified dataset
|
|
clf1 = LogisticRegressionCV(cv=precomputed_folds, multi_class='ovr')
|
|
target_copy = target.copy()
|
|
target_copy[target_copy == 0] = 1
|
|
clf1.fit(train, target_copy)
|
|
|
|
# Ensure that what OvR learns for class2 is same regardless of whether
|
|
# classes 0 and 1 are separated or not
|
|
assert_allclose(clf.scores_[2], clf1.scores_[2])
|
|
assert_allclose(clf.intercept_[2:], clf1.intercept_)
|
|
assert_allclose(clf.coef_[2][np.newaxis, :], clf1.coef_)
|
|
|
|
# Test the shape of various attributes.
|
|
assert clf.coef_.shape == (3, n_features)
|
|
assert_array_equal(clf.classes_, [0, 1, 2])
|
|
coefs_paths = np.asarray(list(clf.coefs_paths_.values()))
|
|
assert coefs_paths.shape == (3, n_cv, 10, n_features + 1)
|
|
assert clf.Cs_.shape == (10,)
|
|
scores = np.asarray(list(clf.scores_.values()))
|
|
assert scores.shape == (3, n_cv, 10)
|
|
|
|
# Test that for the iris data multinomial gives a better accuracy than OvR
|
|
for solver in ['lbfgs', 'newton-cg', 'sag', 'saga']:
|
|
max_iter = 500 if solver in ['sag', 'saga'] else 15
|
|
clf_multi = LogisticRegressionCV(
|
|
solver=solver, multi_class='multinomial', max_iter=max_iter,
|
|
random_state=42, tol=1e-3 if solver in ['sag', 'saga'] else 1e-2,
|
|
cv=2)
|
|
clf_multi.fit(train, target)
|
|
multi_score = clf_multi.score(train, target)
|
|
ovr_score = clf.score(train, target)
|
|
assert multi_score > ovr_score
|
|
|
|
# Test attributes of LogisticRegressionCV
|
|
assert clf.coef_.shape == clf_multi.coef_.shape
|
|
assert_array_equal(clf_multi.classes_, [0, 1, 2])
|
|
coefs_paths = np.asarray(list(clf_multi.coefs_paths_.values()))
|
|
assert coefs_paths.shape == (3, n_cv, 10, n_features + 1)
|
|
assert clf_multi.Cs_.shape == (10,)
|
|
scores = np.asarray(list(clf_multi.scores_.values()))
|
|
assert scores.shape == (3, n_cv, 10)
|
|
|
|
|
|
def test_logistic_regression_solvers():
|
|
X, y = make_classification(n_features=10, n_informative=5, random_state=0)
|
|
|
|
params = dict(fit_intercept=False, random_state=42, multi_class='ovr')
|
|
ncg = LogisticRegression(solver='newton-cg', **params)
|
|
lbf = LogisticRegression(solver='lbfgs', **params)
|
|
lib = LogisticRegression(solver='liblinear', **params)
|
|
sag = LogisticRegression(solver='sag', **params)
|
|
saga = LogisticRegression(solver='saga', **params)
|
|
ncg.fit(X, y)
|
|
lbf.fit(X, y)
|
|
sag.fit(X, y)
|
|
saga.fit(X, y)
|
|
lib.fit(X, y)
|
|
assert_array_almost_equal(ncg.coef_, lib.coef_, decimal=3)
|
|
assert_array_almost_equal(lib.coef_, lbf.coef_, decimal=3)
|
|
assert_array_almost_equal(ncg.coef_, lbf.coef_, decimal=3)
|
|
assert_array_almost_equal(sag.coef_, lib.coef_, decimal=3)
|
|
assert_array_almost_equal(sag.coef_, ncg.coef_, decimal=3)
|
|
assert_array_almost_equal(sag.coef_, lbf.coef_, decimal=3)
|
|
assert_array_almost_equal(saga.coef_, sag.coef_, decimal=3)
|
|
assert_array_almost_equal(saga.coef_, lbf.coef_, decimal=3)
|
|
assert_array_almost_equal(saga.coef_, ncg.coef_, decimal=3)
|
|
assert_array_almost_equal(saga.coef_, lib.coef_, decimal=3)
|
|
|
|
|
|
def test_logistic_regression_solvers_multiclass():
|
|
X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
|
|
n_classes=3, random_state=0)
|
|
tol = 1e-7
|
|
params = dict(fit_intercept=False, tol=tol, random_state=42,
|
|
multi_class='ovr')
|
|
ncg = LogisticRegression(solver='newton-cg', **params)
|
|
lbf = LogisticRegression(solver='lbfgs', **params)
|
|
lib = LogisticRegression(solver='liblinear', **params)
|
|
sag = LogisticRegression(solver='sag', max_iter=1000, **params)
|
|
saga = LogisticRegression(solver='saga', max_iter=10000, **params)
|
|
ncg.fit(X, y)
|
|
lbf.fit(X, y)
|
|
sag.fit(X, y)
|
|
saga.fit(X, y)
|
|
lib.fit(X, y)
|
|
assert_array_almost_equal(ncg.coef_, lib.coef_, decimal=4)
|
|
assert_array_almost_equal(lib.coef_, lbf.coef_, decimal=4)
|
|
assert_array_almost_equal(ncg.coef_, lbf.coef_, decimal=4)
|
|
assert_array_almost_equal(sag.coef_, lib.coef_, decimal=4)
|
|
assert_array_almost_equal(sag.coef_, ncg.coef_, decimal=4)
|
|
assert_array_almost_equal(sag.coef_, lbf.coef_, decimal=4)
|
|
assert_array_almost_equal(saga.coef_, sag.coef_, decimal=4)
|
|
assert_array_almost_equal(saga.coef_, lbf.coef_, decimal=4)
|
|
assert_array_almost_equal(saga.coef_, ncg.coef_, decimal=4)
|
|
assert_array_almost_equal(saga.coef_, lib.coef_, decimal=4)
|
|
|
|
|
|
def test_logistic_regressioncv_class_weights():
|
|
for weight in [{0: 0.1, 1: 0.2}, {0: 0.1, 1: 0.2, 2: 0.5}]:
|
|
n_classes = len(weight)
|
|
for class_weight in (weight, 'balanced'):
|
|
X, y = make_classification(n_samples=30, n_features=3,
|
|
n_repeated=0,
|
|
n_informative=3, n_redundant=0,
|
|
n_classes=n_classes, random_state=0)
|
|
|
|
clf_lbf = LogisticRegressionCV(solver='lbfgs', Cs=1,
|
|
fit_intercept=False,
|
|
multi_class='ovr',
|
|
class_weight=class_weight)
|
|
clf_ncg = LogisticRegressionCV(solver='newton-cg', Cs=1,
|
|
fit_intercept=False,
|
|
multi_class='ovr',
|
|
class_weight=class_weight)
|
|
clf_lib = LogisticRegressionCV(solver='liblinear', Cs=1,
|
|
fit_intercept=False,
|
|
multi_class='ovr',
|
|
class_weight=class_weight)
|
|
clf_sag = LogisticRegressionCV(solver='sag', Cs=1,
|
|
fit_intercept=False,
|
|
multi_class='ovr',
|
|
class_weight=class_weight,
|
|
tol=1e-5, max_iter=10000,
|
|
random_state=0)
|
|
clf_saga = LogisticRegressionCV(solver='saga', Cs=1,
|
|
fit_intercept=False,
|
|
multi_class='ovr',
|
|
class_weight=class_weight,
|
|
tol=1e-5, max_iter=10000,
|
|
random_state=0)
|
|
clf_lbf.fit(X, y)
|
|
clf_ncg.fit(X, y)
|
|
clf_lib.fit(X, y)
|
|
clf_sag.fit(X, y)
|
|
clf_saga.fit(X, y)
|
|
assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
|
|
assert_array_almost_equal(clf_ncg.coef_, clf_lbf.coef_, decimal=4)
|
|
assert_array_almost_equal(clf_sag.coef_, clf_lbf.coef_, decimal=4)
|
|
assert_array_almost_equal(clf_saga.coef_, clf_lbf.coef_, decimal=4)
|
|
|
|
|
|
def test_logistic_regression_sample_weights():
|
|
X, y = make_classification(n_samples=20, n_features=5, n_informative=3,
|
|
n_classes=2, random_state=0)
|
|
sample_weight = y + 1
|
|
|
|
for LR in [LogisticRegression, LogisticRegressionCV]:
|
|
|
|
kw = {'random_state': 42, 'fit_intercept': False, 'multi_class': 'ovr'}
|
|
if LR is LogisticRegressionCV:
|
|
kw.update({'Cs': 3, 'cv': 3})
|
|
|
|
# Test that passing sample_weight as ones is the same as
|
|
# not passing them at all (default None)
|
|
for solver in ['lbfgs', 'liblinear']:
|
|
clf_sw_none = LR(solver=solver, **kw)
|
|
clf_sw_ones = LR(solver=solver, **kw)
|
|
clf_sw_none.fit(X, y)
|
|
clf_sw_ones.fit(X, y, sample_weight=np.ones(y.shape[0]))
|
|
assert_array_almost_equal(
|
|
clf_sw_none.coef_, clf_sw_ones.coef_, decimal=4)
|
|
|
|
# Test that sample weights work the same with the lbfgs,
|
|
# newton-cg, and 'sag' solvers
|
|
clf_sw_lbfgs = LR(**kw)
|
|
clf_sw_lbfgs.fit(X, y, sample_weight=sample_weight)
|
|
clf_sw_n = LR(solver='newton-cg', **kw)
|
|
clf_sw_n.fit(X, y, sample_weight=sample_weight)
|
|
clf_sw_sag = LR(solver='sag', tol=1e-10, **kw)
|
|
# ignore convergence warning due to small dataset
|
|
with ignore_warnings():
|
|
clf_sw_sag.fit(X, y, sample_weight=sample_weight)
|
|
clf_sw_liblinear = LR(solver='liblinear', **kw)
|
|
clf_sw_liblinear.fit(X, y, sample_weight=sample_weight)
|
|
assert_array_almost_equal(
|
|
clf_sw_lbfgs.coef_, clf_sw_n.coef_, decimal=4)
|
|
assert_array_almost_equal(
|
|
clf_sw_lbfgs.coef_, clf_sw_sag.coef_, decimal=4)
|
|
assert_array_almost_equal(
|
|
clf_sw_lbfgs.coef_, clf_sw_liblinear.coef_, decimal=4)
|
|
|
|
# Test that passing class_weight as [1,2] is the same as
|
|
# passing class weight = [1,1] but adjusting sample weights
|
|
# to be 2 for all instances of class 2
|
|
for solver in ['lbfgs', 'liblinear']:
|
|
clf_cw_12 = LR(solver=solver, class_weight={0: 1, 1: 2}, **kw)
|
|
clf_cw_12.fit(X, y)
|
|
clf_sw_12 = LR(solver=solver, **kw)
|
|
clf_sw_12.fit(X, y, sample_weight=sample_weight)
|
|
assert_array_almost_equal(
|
|
clf_cw_12.coef_, clf_sw_12.coef_, decimal=4)
|
|
|
|
# Test the above for l1 penalty and l2 penalty with dual=True.
|
|
# since the patched liblinear code is different.
|
|
clf_cw = LogisticRegression(
|
|
solver="liblinear", fit_intercept=False, class_weight={0: 1, 1: 2},
|
|
penalty="l1", tol=1e-5, random_state=42, multi_class='ovr')
|
|
clf_cw.fit(X, y)
|
|
clf_sw = LogisticRegression(
|
|
solver="liblinear", fit_intercept=False, penalty="l1", tol=1e-5,
|
|
random_state=42, multi_class='ovr')
|
|
clf_sw.fit(X, y, sample_weight)
|
|
assert_array_almost_equal(clf_cw.coef_, clf_sw.coef_, decimal=4)
|
|
|
|
clf_cw = LogisticRegression(
|
|
solver="liblinear", fit_intercept=False, class_weight={0: 1, 1: 2},
|
|
penalty="l2", dual=True, random_state=42, multi_class='ovr')
|
|
clf_cw.fit(X, y)
|
|
clf_sw = LogisticRegression(
|
|
solver="liblinear", fit_intercept=False, penalty="l2", dual=True,
|
|
random_state=42, multi_class='ovr')
|
|
clf_sw.fit(X, y, sample_weight)
|
|
assert_array_almost_equal(clf_cw.coef_, clf_sw.coef_, decimal=4)
|
|
|
|
|
|
def _compute_class_weight_dictionary(y):
|
|
# helper for returning a dictionary instead of an array
|
|
classes = np.unique(y)
|
|
class_weight = compute_class_weight("balanced", classes=classes, y=y)
|
|
class_weight_dict = dict(zip(classes, class_weight))
|
|
return class_weight_dict
|
|
|
|
|
|
def test_logistic_regression_class_weights():
|
|
# Multinomial case: remove 90% of class 0
|
|
X = iris.data[45:, :]
|
|
y = iris.target[45:]
|
|
solvers = ("lbfgs", "newton-cg")
|
|
class_weight_dict = _compute_class_weight_dictionary(y)
|
|
|
|
for solver in solvers:
|
|
clf1 = LogisticRegression(solver=solver, multi_class="multinomial",
|
|
class_weight="balanced")
|
|
clf2 = LogisticRegression(solver=solver, multi_class="multinomial",
|
|
class_weight=class_weight_dict)
|
|
clf1.fit(X, y)
|
|
clf2.fit(X, y)
|
|
assert_array_almost_equal(clf1.coef_, clf2.coef_, decimal=4)
|
|
|
|
# Binary case: remove 90% of class 0 and 100% of class 2
|
|
X = iris.data[45:100, :]
|
|
y = iris.target[45:100]
|
|
solvers = ("lbfgs", "newton-cg", "liblinear")
|
|
class_weight_dict = _compute_class_weight_dictionary(y)
|
|
|
|
for solver in solvers:
|
|
clf1 = LogisticRegression(solver=solver, multi_class="ovr",
|
|
class_weight="balanced")
|
|
clf2 = LogisticRegression(solver=solver, multi_class="ovr",
|
|
class_weight=class_weight_dict)
|
|
clf1.fit(X, y)
|
|
clf2.fit(X, y)
|
|
assert_array_almost_equal(clf1.coef_, clf2.coef_, decimal=6)
|
|
|
|
|
|
def test_logistic_regression_multinomial():
|
|
# Tests for the multinomial option in logistic regression
|
|
|
|
# Some basic attributes of Logistic Regression
|
|
n_samples, n_features, n_classes = 50, 20, 3
|
|
X, y = make_classification(n_samples=n_samples,
|
|
n_features=n_features,
|
|
n_informative=10,
|
|
n_classes=n_classes, random_state=0)
|
|
|
|
X = StandardScaler(with_mean=False).fit_transform(X)
|
|
|
|
# 'lbfgs' is used as a referenced
|
|
solver = 'lbfgs'
|
|
ref_i = LogisticRegression(solver=solver, multi_class='multinomial')
|
|
ref_w = LogisticRegression(solver=solver, multi_class='multinomial',
|
|
fit_intercept=False)
|
|
ref_i.fit(X, y)
|
|
ref_w.fit(X, y)
|
|
assert ref_i.coef_.shape == (n_classes, n_features)
|
|
assert ref_w.coef_.shape == (n_classes, n_features)
|
|
for solver in ['sag', 'saga', 'newton-cg']:
|
|
clf_i = LogisticRegression(solver=solver, multi_class='multinomial',
|
|
random_state=42, max_iter=2000, tol=1e-7,
|
|
)
|
|
clf_w = LogisticRegression(solver=solver, multi_class='multinomial',
|
|
random_state=42, max_iter=2000, tol=1e-7,
|
|
fit_intercept=False)
|
|
clf_i.fit(X, y)
|
|
clf_w.fit(X, y)
|
|
assert clf_i.coef_.shape == (n_classes, n_features)
|
|
assert clf_w.coef_.shape == (n_classes, n_features)
|
|
|
|
# Compare solutions between lbfgs and the other solvers
|
|
assert_allclose(ref_i.coef_, clf_i.coef_, rtol=1e-2)
|
|
assert_allclose(ref_w.coef_, clf_w.coef_, rtol=1e-2)
|
|
assert_allclose(ref_i.intercept_, clf_i.intercept_, rtol=1e-2)
|
|
|
|
# Test that the path give almost the same results. However since in this
|
|
# case we take the average of the coefs after fitting across all the
|
|
# folds, it need not be exactly the same.
|
|
for solver in ['lbfgs', 'newton-cg', 'sag', 'saga']:
|
|
clf_path = LogisticRegressionCV(solver=solver, max_iter=2000, tol=1e-6,
|
|
multi_class='multinomial', Cs=[1.])
|
|
clf_path.fit(X, y)
|
|
assert_allclose(clf_path.coef_, ref_i.coef_, rtol=2e-2)
|
|
assert_allclose(clf_path.intercept_, ref_i.intercept_, rtol=2e-2)
|
|
|
|
|
|
def test_multinomial_grad_hess():
|
|
rng = np.random.RandomState(0)
|
|
n_samples, n_features, n_classes = 100, 5, 3
|
|
X = rng.randn(n_samples, n_features)
|
|
w = rng.rand(n_classes, n_features)
|
|
Y = np.zeros((n_samples, n_classes))
|
|
ind = np.argmax(np.dot(X, w.T), axis=1)
|
|
Y[range(0, n_samples), ind] = 1
|
|
w = w.ravel()
|
|
sample_weights = np.ones(X.shape[0])
|
|
grad, hessp = _multinomial_grad_hess(w, X, Y, alpha=1.,
|
|
sample_weight=sample_weights)
|
|
# extract first column of hessian matrix
|
|
vec = np.zeros(n_features * n_classes)
|
|
vec[0] = 1
|
|
hess_col = hessp(vec)
|
|
|
|
# Estimate hessian using least squares as done in
|
|
# test_logistic_grad_hess
|
|
e = 1e-3
|
|
d_x = np.linspace(-e, e, 30)
|
|
d_grad = np.array([
|
|
_multinomial_grad_hess(w + t * vec, X, Y, alpha=1.,
|
|
sample_weight=sample_weights)[0]
|
|
for t in d_x
|
|
])
|
|
d_grad -= d_grad.mean(axis=0)
|
|
approx_hess_col = linalg.lstsq(d_x[:, np.newaxis], d_grad)[0].ravel()
|
|
assert_array_almost_equal(hess_col, approx_hess_col)
|
|
|
|
|
|
def test_liblinear_decision_function_zero():
|
|
# Test negative prediction when decision_function values are zero.
|
|
# Liblinear predicts the positive class when decision_function values
|
|
# are zero. This is a test to verify that we do not do the same.
|
|
# See Issue: https://github.com/scikit-learn/scikit-learn/issues/3600
|
|
# and the PR https://github.com/scikit-learn/scikit-learn/pull/3623
|
|
X, y = make_classification(n_samples=5, n_features=5, random_state=0)
|
|
clf = LogisticRegression(fit_intercept=False, solver='liblinear',
|
|
multi_class='ovr')
|
|
clf.fit(X, y)
|
|
|
|
# Dummy data such that the decision function becomes zero.
|
|
X = np.zeros((5, 5))
|
|
assert_array_equal(clf.predict(X), np.zeros(5))
|
|
|
|
|
|
def test_liblinear_logregcv_sparse():
|
|
# Test LogRegCV with solver='liblinear' works for sparse matrices
|
|
|
|
X, y = make_classification(n_samples=10, n_features=5, random_state=0)
|
|
clf = LogisticRegressionCV(solver='liblinear', multi_class='ovr')
|
|
clf.fit(sparse.csr_matrix(X), y)
|
|
|
|
|
|
def test_saga_sparse():
|
|
# Test LogRegCV with solver='liblinear' works for sparse matrices
|
|
|
|
X, y = make_classification(n_samples=10, n_features=5, random_state=0)
|
|
clf = LogisticRegressionCV(solver='saga')
|
|
clf.fit(sparse.csr_matrix(X), y)
|
|
|
|
|
|
def test_logreg_intercept_scaling():
|
|
# Test that the right error message is thrown when intercept_scaling <= 0
|
|
|
|
for i in [-1, 0]:
|
|
clf = LogisticRegression(intercept_scaling=i, solver='liblinear',
|
|
multi_class='ovr')
|
|
msg = ('Intercept scaling is %r but needs to be greater than 0.'
|
|
' To disable fitting an intercept,'
|
|
' set fit_intercept=False.' % clf.intercept_scaling)
|
|
assert_raise_message(ValueError, msg, clf.fit, X, Y1)
|
|
|
|
|
|
def test_logreg_intercept_scaling_zero():
|
|
# Test that intercept_scaling is ignored when fit_intercept is False
|
|
|
|
clf = LogisticRegression(fit_intercept=False)
|
|
clf.fit(X, Y1)
|
|
assert clf.intercept_ == 0.
|
|
|
|
|
|
def test_logreg_l1():
|
|
# Because liblinear penalizes the intercept and saga does not, we do not
|
|
# fit the intercept to make it possible to compare the coefficients of
|
|
# the two models at convergence.
|
|
rng = np.random.RandomState(42)
|
|
n_samples = 50
|
|
X, y = make_classification(n_samples=n_samples, n_features=20,
|
|
random_state=0)
|
|
X_noise = rng.normal(size=(n_samples, 3))
|
|
X_constant = np.ones(shape=(n_samples, 2))
|
|
X = np.concatenate((X, X_noise, X_constant), axis=1)
|
|
lr_liblinear = LogisticRegression(penalty="l1", C=1.0, solver='liblinear',
|
|
fit_intercept=False, multi_class='ovr',
|
|
tol=1e-10)
|
|
lr_liblinear.fit(X, y)
|
|
|
|
lr_saga = LogisticRegression(penalty="l1", C=1.0, solver='saga',
|
|
fit_intercept=False, multi_class='ovr',
|
|
max_iter=1000, tol=1e-10)
|
|
lr_saga.fit(X, y)
|
|
assert_array_almost_equal(lr_saga.coef_, lr_liblinear.coef_)
|
|
|
|
# Noise and constant features should be regularized to zero by the l1
|
|
# penalty
|
|
assert_array_almost_equal(lr_liblinear.coef_[0, -5:], np.zeros(5))
|
|
assert_array_almost_equal(lr_saga.coef_[0, -5:], np.zeros(5))
|
|
|
|
|
|
def test_logreg_l1_sparse_data():
|
|
# Because liblinear penalizes the intercept and saga does not, we do not
|
|
# fit the intercept to make it possible to compare the coefficients of
|
|
# the two models at convergence.
|
|
rng = np.random.RandomState(42)
|
|
n_samples = 50
|
|
X, y = make_classification(n_samples=n_samples, n_features=20,
|
|
random_state=0)
|
|
X_noise = rng.normal(scale=0.1, size=(n_samples, 3))
|
|
X_constant = np.zeros(shape=(n_samples, 2))
|
|
X = np.concatenate((X, X_noise, X_constant), axis=1)
|
|
X[X < 1] = 0
|
|
X = sparse.csr_matrix(X)
|
|
|
|
lr_liblinear = LogisticRegression(penalty="l1", C=1.0, solver='liblinear',
|
|
fit_intercept=False, multi_class='ovr',
|
|
tol=1e-10)
|
|
lr_liblinear.fit(X, y)
|
|
|
|
lr_saga = LogisticRegression(penalty="l1", C=1.0, solver='saga',
|
|
fit_intercept=False, multi_class='ovr',
|
|
max_iter=1000, tol=1e-10)
|
|
lr_saga.fit(X, y)
|
|
assert_array_almost_equal(lr_saga.coef_, lr_liblinear.coef_)
|
|
# Noise and constant features should be regularized to zero by the l1
|
|
# penalty
|
|
assert_array_almost_equal(lr_liblinear.coef_[0, -5:], np.zeros(5))
|
|
assert_array_almost_equal(lr_saga.coef_[0, -5:], np.zeros(5))
|
|
|
|
# Check that solving on the sparse and dense data yield the same results
|
|
lr_saga_dense = LogisticRegression(penalty="l1", C=1.0, solver='saga',
|
|
fit_intercept=False, multi_class='ovr',
|
|
max_iter=1000, tol=1e-10)
|
|
lr_saga_dense.fit(X.toarray(), y)
|
|
assert_array_almost_equal(lr_saga.coef_, lr_saga_dense.coef_)
|
|
|
|
|
|
@pytest.mark.parametrize("random_seed", [42])
|
|
@pytest.mark.parametrize("penalty", ["l1", "l2"])
|
|
def test_logistic_regression_cv_refit(random_seed, penalty):
|
|
# Test that when refit=True, logistic regression cv with the saga solver
|
|
# converges to the same solution as logistic regression with a fixed
|
|
# regularization parameter.
|
|
# Internally the LogisticRegressionCV model uses a warm start to refit on
|
|
# the full data model with the optimal C found by CV. As the penalized
|
|
# logistic regression loss is convex, we should still recover exactly
|
|
# the same solution as long as the stopping criterion is strict enough (and
|
|
# that there are no exactly duplicated features when penalty='l1').
|
|
X, y = make_classification(n_samples=100, n_features=20,
|
|
random_state=random_seed)
|
|
common_params = dict(
|
|
solver='saga',
|
|
penalty=penalty,
|
|
random_state=random_seed,
|
|
max_iter=1000,
|
|
tol=1e-12,
|
|
)
|
|
lr_cv = LogisticRegressionCV(Cs=[1.0], refit=True, **common_params)
|
|
lr_cv.fit(X, y)
|
|
lr = LogisticRegression(C=1.0, **common_params)
|
|
lr.fit(X, y)
|
|
assert_array_almost_equal(lr_cv.coef_, lr.coef_)
|
|
|
|
|
|
def test_logreg_predict_proba_multinomial():
|
|
X, y = make_classification(n_samples=10, n_features=20, random_state=0,
|
|
n_classes=3, n_informative=10)
|
|
|
|
# Predicted probabilities using the true-entropy loss should give a
|
|
# smaller loss than those using the ovr method.
|
|
clf_multi = LogisticRegression(multi_class="multinomial", solver="lbfgs")
|
|
clf_multi.fit(X, y)
|
|
clf_multi_loss = log_loss(y, clf_multi.predict_proba(X))
|
|
clf_ovr = LogisticRegression(multi_class="ovr", solver="lbfgs")
|
|
clf_ovr.fit(X, y)
|
|
clf_ovr_loss = log_loss(y, clf_ovr.predict_proba(X))
|
|
assert clf_ovr_loss > clf_multi_loss
|
|
|
|
# Predicted probabilities using the soft-max function should give a
|
|
# smaller loss than those using the logistic function.
|
|
clf_multi_loss = log_loss(y, clf_multi.predict_proba(X))
|
|
clf_wrong_loss = log_loss(y, clf_multi._predict_proba_lr(X))
|
|
assert clf_wrong_loss > clf_multi_loss
|
|
|
|
|
|
def test_max_iter():
|
|
# Test that the maximum number of iteration is reached
|
|
X, y_bin = iris.data, iris.target.copy()
|
|
y_bin[y_bin == 2] = 0
|
|
|
|
solvers = ['newton-cg', 'liblinear', 'sag', 'saga', 'lbfgs']
|
|
|
|
for max_iter in range(1, 5):
|
|
for solver in solvers:
|
|
for multi_class in ['ovr', 'multinomial']:
|
|
if solver == 'liblinear' and multi_class == 'multinomial':
|
|
continue
|
|
lr = LogisticRegression(max_iter=max_iter, tol=1e-15,
|
|
multi_class=multi_class,
|
|
random_state=0, solver=solver)
|
|
assert_warns(ConvergenceWarning, lr.fit, X, y_bin)
|
|
assert lr.n_iter_[0] == max_iter
|
|
|
|
|
|
@pytest.mark.parametrize('solver',
|
|
['newton-cg', 'liblinear', 'sag', 'saga', 'lbfgs'])
|
|
def test_n_iter(solver):
|
|
# Test that self.n_iter_ has the correct format.
|
|
X, y = iris.data, iris.target
|
|
|
|
y_bin = y.copy()
|
|
y_bin[y_bin == 2] = 0
|
|
|
|
n_Cs = 4
|
|
n_cv_fold = 2
|
|
|
|
# OvR case
|
|
n_classes = 1 if solver == 'liblinear' else np.unique(y).shape[0]
|
|
clf = LogisticRegression(tol=1e-2, multi_class='ovr',
|
|
solver=solver, C=1.,
|
|
random_state=42, max_iter=100)
|
|
clf.fit(X, y)
|
|
assert clf.n_iter_.shape == (n_classes,)
|
|
|
|
n_classes = np.unique(y).shape[0]
|
|
clf = LogisticRegressionCV(tol=1e-2, multi_class='ovr',
|
|
solver=solver, Cs=n_Cs, cv=n_cv_fold,
|
|
random_state=42, max_iter=100)
|
|
clf.fit(X, y)
|
|
assert clf.n_iter_.shape == (n_classes, n_cv_fold, n_Cs)
|
|
clf.fit(X, y_bin)
|
|
assert clf.n_iter_.shape == (1, n_cv_fold, n_Cs)
|
|
|
|
# multinomial case
|
|
n_classes = 1
|
|
if solver in ('liblinear', 'sag', 'saga'):
|
|
return
|
|
|
|
clf = LogisticRegression(tol=1e-2, multi_class='multinomial',
|
|
solver=solver, C=1.,
|
|
random_state=42, max_iter=100)
|
|
clf.fit(X, y)
|
|
assert clf.n_iter_.shape == (n_classes,)
|
|
|
|
clf = LogisticRegressionCV(tol=1e-2, multi_class='multinomial',
|
|
solver=solver, Cs=n_Cs, cv=n_cv_fold,
|
|
random_state=42, max_iter=100)
|
|
clf.fit(X, y)
|
|
assert clf.n_iter_.shape == (n_classes, n_cv_fold, n_Cs)
|
|
clf.fit(X, y_bin)
|
|
assert clf.n_iter_.shape == (1, n_cv_fold, n_Cs)
|
|
|
|
|
|
@pytest.mark.parametrize('solver', ('newton-cg', 'sag', 'saga', 'lbfgs'))
|
|
@pytest.mark.parametrize('warm_start', (True, False))
|
|
@pytest.mark.parametrize('fit_intercept', (True, False))
|
|
@pytest.mark.parametrize('multi_class', ['ovr', 'multinomial'])
|
|
def test_warm_start(solver, warm_start, fit_intercept, multi_class):
|
|
# A 1-iteration second fit on same data should give almost same result
|
|
# with warm starting, and quite different result without warm starting.
|
|
# Warm starting does not work with liblinear solver.
|
|
X, y = iris.data, iris.target
|
|
|
|
clf = LogisticRegression(tol=1e-4, multi_class=multi_class,
|
|
warm_start=warm_start,
|
|
solver=solver,
|
|
random_state=42, max_iter=100,
|
|
fit_intercept=fit_intercept)
|
|
with ignore_warnings(category=ConvergenceWarning):
|
|
clf.fit(X, y)
|
|
coef_1 = clf.coef_
|
|
|
|
clf.max_iter = 1
|
|
clf.fit(X, y)
|
|
cum_diff = np.sum(np.abs(coef_1 - clf.coef_))
|
|
msg = ("Warm starting issue with %s solver in %s mode "
|
|
"with fit_intercept=%s and warm_start=%s"
|
|
% (solver, multi_class, str(fit_intercept),
|
|
str(warm_start)))
|
|
if warm_start:
|
|
assert 2.0 > cum_diff, msg
|
|
else:
|
|
assert cum_diff > 2.0, msg
|
|
|
|
|
|
def test_saga_vs_liblinear():
|
|
iris = load_iris()
|
|
X, y = iris.data, iris.target
|
|
X = np.concatenate([X] * 3)
|
|
y = np.concatenate([y] * 3)
|
|
|
|
X_bin = X[y <= 1]
|
|
y_bin = y[y <= 1] * 2 - 1
|
|
|
|
X_sparse, y_sparse = make_classification(n_samples=50, n_features=20,
|
|
random_state=0)
|
|
X_sparse = sparse.csr_matrix(X_sparse)
|
|
|
|
for (X, y) in ((X_bin, y_bin), (X_sparse, y_sparse)):
|
|
for penalty in ['l1', 'l2']:
|
|
n_samples = X.shape[0]
|
|
# alpha=1e-3 is time consuming
|
|
for alpha in np.logspace(-1, 1, 3):
|
|
saga = LogisticRegression(
|
|
C=1. / (n_samples * alpha),
|
|
solver='saga',
|
|
multi_class='ovr',
|
|
max_iter=200,
|
|
fit_intercept=False,
|
|
penalty=penalty, random_state=0, tol=1e-24)
|
|
|
|
liblinear = LogisticRegression(
|
|
C=1. / (n_samples * alpha),
|
|
solver='liblinear',
|
|
multi_class='ovr',
|
|
max_iter=200,
|
|
fit_intercept=False,
|
|
penalty=penalty, random_state=0, tol=1e-24)
|
|
|
|
saga.fit(X, y)
|
|
liblinear.fit(X, y)
|
|
# Convergence for alpha=1e-3 is very slow
|
|
assert_array_almost_equal(saga.coef_, liblinear.coef_, 3)
|
|
|
|
|
|
@pytest.mark.parametrize('multi_class', ['ovr', 'multinomial'])
|
|
@pytest.mark.parametrize('solver', ['newton-cg', 'liblinear', 'saga'])
|
|
@pytest.mark.parametrize('fit_intercept', [False, True])
|
|
def test_dtype_match(solver, multi_class, fit_intercept):
|
|
# Test that np.float32 input data is not cast to np.float64 when possible
|
|
# and that the output is approximately the same no matter the input format.
|
|
|
|
if solver == 'liblinear' and multi_class == 'multinomial':
|
|
pytest.skip('liblinear does not support multinomial logistic')
|
|
|
|
out32_type = np.float64 if solver == 'liblinear' else np.float32
|
|
|
|
X_32 = np.array(X).astype(np.float32)
|
|
y_32 = np.array(Y1).astype(np.float32)
|
|
X_64 = np.array(X).astype(np.float64)
|
|
y_64 = np.array(Y1).astype(np.float64)
|
|
X_sparse_32 = sp.csr_matrix(X, dtype=np.float32)
|
|
X_sparse_64 = sp.csr_matrix(X, dtype=np.float64)
|
|
solver_tol = 5e-4
|
|
|
|
lr_templ = LogisticRegression(
|
|
solver=solver, multi_class=multi_class,
|
|
random_state=42, tol=solver_tol, fit_intercept=fit_intercept)
|
|
|
|
# Check 32-bit type consistency
|
|
lr_32 = clone(lr_templ)
|
|
lr_32.fit(X_32, y_32)
|
|
assert lr_32.coef_.dtype == out32_type
|
|
|
|
# Check 32-bit type consistency with sparsity
|
|
lr_32_sparse = clone(lr_templ)
|
|
lr_32_sparse.fit(X_sparse_32, y_32)
|
|
assert lr_32_sparse.coef_.dtype == out32_type
|
|
|
|
# Check 64-bit type consistency
|
|
lr_64 = clone(lr_templ)
|
|
lr_64.fit(X_64, y_64)
|
|
assert lr_64.coef_.dtype == np.float64
|
|
|
|
# Check 64-bit type consistency with sparsity
|
|
lr_64_sparse = clone(lr_templ)
|
|
lr_64_sparse.fit(X_sparse_64, y_64)
|
|
assert lr_64_sparse.coef_.dtype == np.float64
|
|
|
|
# solver_tol bounds the norm of the loss gradient
|
|
# dw ~= inv(H)*grad ==> |dw| ~= |inv(H)| * solver_tol, where H - hessian
|
|
#
|
|
# See https://github.com/scikit-learn/scikit-learn/pull/13645
|
|
#
|
|
# with Z = np.hstack((np.ones((3,1)), np.array(X)))
|
|
# In [8]: np.linalg.norm(np.diag([0,2,2]) + np.linalg.inv((Z.T @ Z)/4))
|
|
# Out[8]: 1.7193336918135917
|
|
|
|
# factor of 2 to get the ball diameter
|
|
atol = 2 * 1.72 * solver_tol
|
|
if os.name == 'nt' and _IS_32BIT:
|
|
# FIXME
|
|
atol = 1e-2
|
|
|
|
# Check accuracy consistency
|
|
assert_allclose(lr_32.coef_, lr_64.coef_.astype(np.float32), atol=atol)
|
|
|
|
if solver == 'saga' and fit_intercept:
|
|
# FIXME: SAGA on sparse data fits the intercept inaccurately with the
|
|
# default tol and max_iter parameters.
|
|
atol = 1e-1
|
|
|
|
assert_allclose(lr_32.coef_, lr_32_sparse.coef_, atol=atol)
|
|
assert_allclose(lr_64.coef_, lr_64_sparse.coef_, atol=atol)
|
|
|
|
|
|
def test_warm_start_converge_LR():
|
|
# Test to see that the logistic regression converges on warm start,
|
|
# with multi_class='multinomial'. Non-regressive test for #10836
|
|
|
|
rng = np.random.RandomState(0)
|
|
X = np.concatenate((rng.randn(100, 2) + [1, 1], rng.randn(100, 2)))
|
|
y = np.array([1] * 100 + [-1] * 100)
|
|
lr_no_ws = LogisticRegression(multi_class='multinomial',
|
|
solver='sag', warm_start=False,
|
|
random_state=0)
|
|
lr_ws = LogisticRegression(multi_class='multinomial',
|
|
solver='sag', warm_start=True,
|
|
random_state=0)
|
|
|
|
lr_no_ws_loss = log_loss(y, lr_no_ws.fit(X, y).predict_proba(X))
|
|
for i in range(5):
|
|
lr_ws.fit(X, y)
|
|
lr_ws_loss = log_loss(y, lr_ws.predict_proba(X))
|
|
assert_allclose(lr_no_ws_loss, lr_ws_loss, rtol=1e-5)
|
|
|
|
|
|
def test_elastic_net_coeffs():
|
|
# make sure elasticnet penalty gives different coefficients from l1 and l2
|
|
# with saga solver (l1_ratio different from 0 or 1)
|
|
X, y = make_classification(random_state=0)
|
|
|
|
C = 2.
|
|
l1_ratio = .5
|
|
coeffs = list()
|
|
for penalty in ('elasticnet', 'l1', 'l2'):
|
|
lr = LogisticRegression(penalty=penalty, C=C, solver='saga',
|
|
random_state=0, l1_ratio=l1_ratio)
|
|
lr.fit(X, y)
|
|
coeffs.append(lr.coef_)
|
|
|
|
elastic_net_coeffs, l1_coeffs, l2_coeffs = coeffs
|
|
# make sure coeffs differ by at least .1
|
|
assert not np.allclose(elastic_net_coeffs, l1_coeffs, rtol=0, atol=.1)
|
|
assert not np.allclose(elastic_net_coeffs, l2_coeffs, rtol=0, atol=.1)
|
|
assert not np.allclose(l2_coeffs, l1_coeffs, rtol=0, atol=.1)
|
|
|
|
|
|
@pytest.mark.parametrize('C', [.001, .1, 1, 10, 100, 1000, 1e6])
|
|
@pytest.mark.parametrize('penalty, l1_ratio',
|
|
[('l1', 1),
|
|
('l2', 0)])
|
|
def test_elastic_net_l1_l2_equivalence(C, penalty, l1_ratio):
|
|
# Make sure elasticnet is equivalent to l1 when l1_ratio=1 and to l2 when
|
|
# l1_ratio=0.
|
|
X, y = make_classification(random_state=0)
|
|
|
|
lr_enet = LogisticRegression(penalty='elasticnet', C=C, l1_ratio=l1_ratio,
|
|
solver='saga', random_state=0)
|
|
lr_expected = LogisticRegression(penalty=penalty, C=C, solver='saga',
|
|
random_state=0)
|
|
lr_enet.fit(X, y)
|
|
lr_expected.fit(X, y)
|
|
|
|
assert_array_almost_equal(lr_enet.coef_, lr_expected.coef_)
|
|
|
|
|
|
@pytest.mark.parametrize('C', [.001, 1, 100, 1e6])
|
|
def test_elastic_net_vs_l1_l2(C):
|
|
# Make sure that elasticnet with grid search on l1_ratio gives same or
|
|
# better results than just l1 or just l2.
|
|
|
|
X, y = make_classification(500, random_state=0)
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
|
|
|
param_grid = {'l1_ratio': np.linspace(0, 1, 5)}
|
|
|
|
enet_clf = LogisticRegression(penalty='elasticnet', C=C, solver='saga',
|
|
random_state=0)
|
|
gs = GridSearchCV(enet_clf, param_grid, refit=True)
|
|
|
|
l1_clf = LogisticRegression(penalty='l1', C=C, solver='saga',
|
|
random_state=0)
|
|
l2_clf = LogisticRegression(penalty='l2', C=C, solver='saga',
|
|
random_state=0)
|
|
|
|
for clf in (gs, l1_clf, l2_clf):
|
|
clf.fit(X_train, y_train)
|
|
|
|
assert gs.score(X_test, y_test) >= l1_clf.score(X_test, y_test)
|
|
assert gs.score(X_test, y_test) >= l2_clf.score(X_test, y_test)
|
|
|
|
|
|
@pytest.mark.parametrize('C', np.logspace(-3, 2, 4))
|
|
@pytest.mark.parametrize('l1_ratio', [.1, .5, .9])
|
|
def test_LogisticRegression_elastic_net_objective(C, l1_ratio):
|
|
# Check that training with a penalty matching the objective leads
|
|
# to a lower objective.
|
|
# Here we train a logistic regression with l2 (a) and elasticnet (b)
|
|
# penalties, and compute the elasticnet objective. That of a should be
|
|
# greater than that of b (both objectives are convex).
|
|
X, y = make_classification(n_samples=1000, n_classes=2, n_features=20,
|
|
n_informative=10, n_redundant=0,
|
|
n_repeated=0, random_state=0)
|
|
X = scale(X)
|
|
|
|
lr_enet = LogisticRegression(penalty='elasticnet', solver='saga',
|
|
random_state=0, C=C, l1_ratio=l1_ratio,
|
|
fit_intercept=False)
|
|
lr_l2 = LogisticRegression(penalty='l2', solver='saga', random_state=0,
|
|
C=C, fit_intercept=False)
|
|
lr_enet.fit(X, y)
|
|
lr_l2.fit(X, y)
|
|
|
|
def enet_objective(lr):
|
|
coef = lr.coef_.ravel()
|
|
obj = C * log_loss(y, lr.predict_proba(X))
|
|
obj += l1_ratio * np.sum(np.abs(coef))
|
|
obj += (1. - l1_ratio) * 0.5 * np.dot(coef, coef)
|
|
return obj
|
|
|
|
assert enet_objective(lr_enet) < enet_objective(lr_l2)
|
|
|
|
|
|
@pytest.mark.parametrize('multi_class', ('ovr', 'multinomial'))
|
|
def test_LogisticRegressionCV_GridSearchCV_elastic_net(multi_class):
|
|
# make sure LogisticRegressionCV gives same best params (l1 and C) as
|
|
# GridSearchCV when penalty is elasticnet
|
|
|
|
if multi_class == 'ovr':
|
|
# This is actually binary classification, ovr multiclass is treated in
|
|
# test_LogisticRegressionCV_GridSearchCV_elastic_net_ovr
|
|
X, y = make_classification(random_state=0)
|
|
else:
|
|
X, y = make_classification(n_samples=100, n_classes=3, n_informative=3,
|
|
random_state=0)
|
|
|
|
cv = StratifiedKFold(5)
|
|
|
|
l1_ratios = np.linspace(0, 1, 3)
|
|
Cs = np.logspace(-4, 4, 3)
|
|
|
|
lrcv = LogisticRegressionCV(penalty='elasticnet', Cs=Cs, solver='saga',
|
|
cv=cv, l1_ratios=l1_ratios, random_state=0,
|
|
multi_class=multi_class)
|
|
lrcv.fit(X, y)
|
|
|
|
param_grid = {'C': Cs, 'l1_ratio': l1_ratios}
|
|
lr = LogisticRegression(penalty='elasticnet', solver='saga',
|
|
random_state=0, multi_class=multi_class)
|
|
gs = GridSearchCV(lr, param_grid, cv=cv)
|
|
gs.fit(X, y)
|
|
|
|
assert gs.best_params_['l1_ratio'] == lrcv.l1_ratio_[0]
|
|
assert gs.best_params_['C'] == lrcv.C_[0]
|
|
|
|
|
|
def test_LogisticRegressionCV_GridSearchCV_elastic_net_ovr():
|
|
# make sure LogisticRegressionCV gives same best params (l1 and C) as
|
|
# GridSearchCV when penalty is elasticnet and multiclass is ovr. We can't
|
|
# compare best_params like in the previous test because
|
|
# LogisticRegressionCV with multi_class='ovr' will have one C and one
|
|
# l1_param for each class, while LogisticRegression will share the
|
|
# parameters over the *n_classes* classifiers.
|
|
|
|
X, y = make_classification(n_samples=100, n_classes=3, n_informative=3,
|
|
random_state=0)
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
|
cv = StratifiedKFold(5)
|
|
|
|
l1_ratios = np.linspace(0, 1, 3)
|
|
Cs = np.logspace(-4, 4, 3)
|
|
|
|
lrcv = LogisticRegressionCV(penalty='elasticnet', Cs=Cs, solver='saga',
|
|
cv=cv, l1_ratios=l1_ratios, random_state=0,
|
|
multi_class='ovr')
|
|
lrcv.fit(X_train, y_train)
|
|
|
|
param_grid = {'C': Cs, 'l1_ratio': l1_ratios}
|
|
lr = LogisticRegression(penalty='elasticnet', solver='saga',
|
|
random_state=0, multi_class='ovr')
|
|
gs = GridSearchCV(lr, param_grid, cv=cv)
|
|
gs.fit(X_train, y_train)
|
|
|
|
# Check that predictions are 80% the same
|
|
assert (lrcv.predict(X_train) == gs.predict(X_train)).mean() >= .8
|
|
assert (lrcv.predict(X_test) == gs.predict(X_test)).mean() >= .8
|
|
|
|
|
|
@pytest.mark.parametrize('penalty', ('l2', 'elasticnet'))
|
|
@pytest.mark.parametrize('multi_class', ('ovr', 'multinomial', 'auto'))
|
|
def test_LogisticRegressionCV_no_refit(penalty, multi_class):
|
|
# Test LogisticRegressionCV attribute shapes when refit is False
|
|
|
|
n_classes = 3
|
|
n_features = 20
|
|
X, y = make_classification(n_samples=200, n_classes=n_classes,
|
|
n_informative=n_classes, n_features=n_features,
|
|
random_state=0)
|
|
|
|
Cs = np.logspace(-4, 4, 3)
|
|
if penalty == 'elasticnet':
|
|
l1_ratios = np.linspace(0, 1, 2)
|
|
else:
|
|
l1_ratios = None
|
|
|
|
lrcv = LogisticRegressionCV(penalty=penalty, Cs=Cs, solver='saga',
|
|
l1_ratios=l1_ratios, random_state=0,
|
|
multi_class=multi_class, refit=False)
|
|
lrcv.fit(X, y)
|
|
assert lrcv.C_.shape == (n_classes,)
|
|
assert lrcv.l1_ratio_.shape == (n_classes,)
|
|
assert lrcv.coef_.shape == (n_classes, n_features)
|
|
|
|
|
|
def test_LogisticRegressionCV_elasticnet_attribute_shapes():
|
|
# Make sure the shapes of scores_ and coefs_paths_ attributes are correct
|
|
# when using elasticnet (added one dimension for l1_ratios)
|
|
|
|
n_classes = 3
|
|
n_features = 20
|
|
X, y = make_classification(n_samples=200, n_classes=n_classes,
|
|
n_informative=n_classes, n_features=n_features,
|
|
random_state=0)
|
|
|
|
Cs = np.logspace(-4, 4, 3)
|
|
l1_ratios = np.linspace(0, 1, 2)
|
|
|
|
n_folds = 2
|
|
lrcv = LogisticRegressionCV(penalty='elasticnet', Cs=Cs, solver='saga',
|
|
cv=n_folds, l1_ratios=l1_ratios,
|
|
multi_class='ovr', random_state=0)
|
|
lrcv.fit(X, y)
|
|
coefs_paths = np.asarray(list(lrcv.coefs_paths_.values()))
|
|
assert coefs_paths.shape == (n_classes, n_folds, Cs.size,
|
|
l1_ratios.size, n_features + 1)
|
|
scores = np.asarray(list(lrcv.scores_.values()))
|
|
assert scores.shape == (n_classes, n_folds, Cs.size, l1_ratios.size)
|
|
|
|
assert lrcv.n_iter_.shape == (n_classes, n_folds, Cs.size, l1_ratios.size)
|
|
|
|
|
|
@pytest.mark.parametrize('l1_ratio', (-1, 2, None, 'something_wrong'))
|
|
def test_l1_ratio_param(l1_ratio):
|
|
|
|
msg = "l1_ratio must be between 0 and 1; got (l1_ratio=%r)" % l1_ratio
|
|
assert_raise_message(ValueError, msg,
|
|
LogisticRegression(penalty='elasticnet',
|
|
solver='saga',
|
|
l1_ratio=l1_ratio).fit, X, Y1)
|
|
if l1_ratio is not None:
|
|
msg = ("l1_ratio parameter is only used when penalty is 'elasticnet'."
|
|
" Got (penalty=l1)")
|
|
assert_warns_message(UserWarning, msg,
|
|
LogisticRegression(penalty='l1', solver='saga',
|
|
l1_ratio=l1_ratio).fit, X, Y1)
|
|
|
|
|
|
@pytest.mark.parametrize('l1_ratios', ([], [.5, 2], None, 'something_wrong'))
|
|
def test_l1_ratios_param(l1_ratios):
|
|
|
|
msg = ("l1_ratios must be a list of numbers between 0 and 1; got "
|
|
"(l1_ratios=%r)" % l1_ratios)
|
|
assert_raise_message(ValueError, msg,
|
|
LogisticRegressionCV(penalty='elasticnet',
|
|
solver='saga',
|
|
l1_ratios=l1_ratios, cv=2).fit,
|
|
X, Y1)
|
|
if l1_ratios is not None:
|
|
msg = ("l1_ratios parameter is only used when penalty is "
|
|
"'elasticnet'. Got (penalty=l1)")
|
|
function = LogisticRegressionCV(penalty='l1', solver='saga',
|
|
l1_ratios=l1_ratios, cv=2).fit
|
|
assert_warns_message(UserWarning, msg, function, X, Y1)
|
|
|
|
|
|
@pytest.mark.parametrize('C', np.logspace(-3, 2, 4))
|
|
@pytest.mark.parametrize('l1_ratio', [.1, .5, .9])
|
|
def test_elastic_net_versus_sgd(C, l1_ratio):
|
|
# Compare elasticnet penalty in LogisticRegression() and SGD(loss='log')
|
|
n_samples = 500
|
|
X, y = make_classification(n_samples=n_samples, n_classes=2, n_features=5,
|
|
n_informative=5, n_redundant=0, n_repeated=0,
|
|
random_state=1)
|
|
X = scale(X)
|
|
|
|
sgd = SGDClassifier(
|
|
penalty='elasticnet', random_state=1, fit_intercept=False, tol=-np.inf,
|
|
max_iter=2000, l1_ratio=l1_ratio, alpha=1. / C / n_samples, loss='log')
|
|
log = LogisticRegression(
|
|
penalty='elasticnet', random_state=1, fit_intercept=False, tol=1e-5,
|
|
max_iter=1000, l1_ratio=l1_ratio, C=C, solver='saga')
|
|
|
|
sgd.fit(X, y)
|
|
log.fit(X, y)
|
|
assert_array_almost_equal(sgd.coef_, log.coef_, decimal=1)
|
|
|
|
|
|
def test_logistic_regression_path_coefs_multinomial():
|
|
# Make sure that the returned coefs by logistic_regression_path when
|
|
# multi_class='multinomial' don't override each other (used to be a
|
|
# bug).
|
|
X, y = make_classification(n_samples=200, n_classes=3, n_informative=2,
|
|
n_redundant=0, n_clusters_per_class=1,
|
|
random_state=0, n_features=2)
|
|
Cs = [.00001, 1, 10000]
|
|
coefs, _, _ = _logistic_regression_path(X, y, penalty='l1', Cs=Cs,
|
|
solver='saga', random_state=0,
|
|
multi_class='multinomial')
|
|
|
|
with pytest.raises(AssertionError):
|
|
assert_array_almost_equal(coefs[0], coefs[1], decimal=1)
|
|
with pytest.raises(AssertionError):
|
|
assert_array_almost_equal(coefs[0], coefs[2], decimal=1)
|
|
with pytest.raises(AssertionError):
|
|
assert_array_almost_equal(coefs[1], coefs[2], decimal=1)
|
|
|
|
|
|
@pytest.mark.parametrize('est',
|
|
[LogisticRegression(random_state=0),
|
|
LogisticRegressionCV(random_state=0, cv=3,
|
|
Cs=3, tol=1e-3)],
|
|
ids=lambda x: x.__class__.__name__)
|
|
@pytest.mark.parametrize('solver', ['liblinear', 'lbfgs', 'newton-cg', 'sag',
|
|
'saga'])
|
|
def test_logistic_regression_multi_class_auto(est, solver):
|
|
# check multi_class='auto' => multi_class='ovr' iff binary y or liblinear
|
|
|
|
def fit(X, y, **kw):
|
|
return clone(est).set_params(**kw).fit(X, y)
|
|
|
|
X = iris.data[::10]
|
|
X2 = iris.data[1::10]
|
|
y_multi = iris.target[::10]
|
|
y_bin = y_multi == 0
|
|
est_auto_bin = fit(X, y_bin, multi_class='auto', solver=solver)
|
|
est_ovr_bin = fit(X, y_bin, multi_class='ovr', solver=solver)
|
|
assert_allclose(est_auto_bin.coef_, est_ovr_bin.coef_)
|
|
assert_allclose(est_auto_bin.predict_proba(X2),
|
|
est_ovr_bin.predict_proba(X2))
|
|
|
|
est_auto_multi = fit(X, y_multi, multi_class='auto', solver=solver)
|
|
if solver == 'liblinear':
|
|
est_ovr_multi = fit(X, y_multi, multi_class='ovr', solver=solver)
|
|
assert_allclose(est_auto_multi.coef_, est_ovr_multi.coef_)
|
|
assert_allclose(est_auto_multi.predict_proba(X2),
|
|
est_ovr_multi.predict_proba(X2))
|
|
else:
|
|
est_multi_multi = fit(X, y_multi, multi_class='multinomial',
|
|
solver=solver)
|
|
if sys.platform == 'darwin' and solver == 'lbfgs':
|
|
pytest.xfail('Issue #11924: LogisticRegressionCV(solver="lbfgs", '
|
|
'multi_class="multinomial") is nondterministic on '
|
|
'MacOS.')
|
|
assert_allclose(est_auto_multi.coef_, est_multi_multi.coef_)
|
|
assert_allclose(est_auto_multi.predict_proba(X2),
|
|
est_multi_multi.predict_proba(X2))
|
|
|
|
# Make sure multi_class='ovr' is distinct from ='multinomial'
|
|
assert not np.allclose(est_auto_bin.coef_,
|
|
fit(X, y_bin, multi_class='multinomial',
|
|
solver=solver).coef_)
|
|
assert not np.allclose(est_auto_bin.coef_,
|
|
fit(X, y_multi, multi_class='multinomial',
|
|
solver=solver).coef_)
|
|
|
|
|
|
@pytest.mark.parametrize('solver', ('lbfgs', 'newton-cg', 'sag', 'saga'))
|
|
def test_penalty_none(solver):
|
|
# - Make sure warning is raised if penalty='none' and C is set to a
|
|
# non-default value.
|
|
# - Make sure setting penalty='none' is equivalent to setting C=np.inf with
|
|
# l2 penalty.
|
|
X, y = make_classification(n_samples=1000, random_state=0)
|
|
|
|
msg = "Setting penalty='none' will ignore the C"
|
|
lr = LogisticRegression(penalty='none', solver=solver, C=4)
|
|
assert_warns_message(UserWarning, msg, lr.fit, X, y)
|
|
|
|
lr_none = LogisticRegression(penalty='none', solver=solver,
|
|
random_state=0)
|
|
lr_l2_C_inf = LogisticRegression(penalty='l2', C=np.inf, solver=solver,
|
|
random_state=0)
|
|
pred_none = lr_none.fit(X, y).predict(X)
|
|
pred_l2_C_inf = lr_l2_C_inf.fit(X, y).predict(X)
|
|
assert_array_equal(pred_none, pred_l2_C_inf)
|
|
|
|
lr = LogisticRegressionCV(penalty='none')
|
|
assert_raise_message(
|
|
ValueError,
|
|
"penalty='none' is not useful and not supported by "
|
|
"LogisticRegressionCV",
|
|
lr.fit, X, y
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"params",
|
|
[{'penalty': 'l1', 'dual': False, 'tol': 1e-12, 'max_iter': 1000},
|
|
{'penalty': 'l2', 'dual': True, 'tol': 1e-12, 'max_iter': 1000},
|
|
{'penalty': 'l2', 'dual': False, 'tol': 1e-12, 'max_iter': 1000}]
|
|
)
|
|
def test_logisticregression_liblinear_sample_weight(params):
|
|
# check that we support sample_weight with liblinear in all possible cases:
|
|
# l1-primal, l2-primal, l2-dual
|
|
X = np.array([[1, 3], [1, 3], [1, 3], [1, 3],
|
|
[2, 1], [2, 1], [2, 1], [2, 1],
|
|
[3, 3], [3, 3], [3, 3], [3, 3],
|
|
[4, 1], [4, 1], [4, 1], [4, 1]], dtype=np.dtype('float'))
|
|
y = np.array([1, 1, 1, 1, 2, 2, 2, 2,
|
|
1, 1, 1, 1, 2, 2, 2, 2], dtype=np.dtype('int'))
|
|
|
|
X2 = np.vstack([X, X])
|
|
y2 = np.hstack([y, 3 - y])
|
|
sample_weight = np.ones(shape=len(y) * 2)
|
|
sample_weight[len(y):] = 0
|
|
X2, y2, sample_weight = shuffle(X2, y2, sample_weight, random_state=0)
|
|
|
|
base_clf = LogisticRegression(solver='liblinear', random_state=42)
|
|
base_clf.set_params(**params)
|
|
clf_no_weight = clone(base_clf).fit(X, y)
|
|
clf_with_weight = clone(base_clf).fit(X2, y2, sample_weight=sample_weight)
|
|
|
|
for method in ("predict", "predict_proba", "decision_function"):
|
|
X_clf_no_weight = getattr(clf_no_weight, method)(X)
|
|
X_clf_with_weight = getattr(clf_with_weight, method)(X)
|
|
assert_allclose(X_clf_no_weight, X_clf_with_weight)
|
|
|
|
|
|
def test_scores_attribute_layout_elasticnet():
|
|
# Non regression test for issue #14955.
|
|
# when penalty is elastic net the scores_ attribute has shape
|
|
# (n_classes, n_Cs, n_l1_ratios)
|
|
# We here make sure that the second dimension indeed corresponds to Cs and
|
|
# the third dimension corresponds to l1_ratios.
|
|
|
|
X, y = make_classification(n_samples=1000, random_state=0)
|
|
cv = StratifiedKFold(n_splits=5)
|
|
|
|
l1_ratios = [.1, .9]
|
|
Cs = [.1, 1, 10]
|
|
|
|
lrcv = LogisticRegressionCV(penalty='elasticnet', solver='saga',
|
|
l1_ratios=l1_ratios, Cs=Cs, cv=cv,
|
|
random_state=0)
|
|
lrcv.fit(X, y)
|
|
|
|
avg_scores_lrcv = lrcv.scores_[1].mean(axis=0) # average over folds
|
|
|
|
for i, C in enumerate(Cs):
|
|
for j, l1_ratio in enumerate(l1_ratios):
|
|
|
|
lr = LogisticRegression(penalty='elasticnet', solver='saga', C=C,
|
|
l1_ratio=l1_ratio, random_state=0)
|
|
|
|
avg_score_lr = cross_val_score(lr, X, y, cv=cv).mean()
|
|
assert avg_scores_lrcv[i, j] == pytest.approx(avg_score_lr)
|