Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,305 @@
|
|||
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
|
||||
# Gael Varoquaux <gael.varoquaux@normalesup.org>
|
||||
# Virgile Fritsch <virgile.fritsch@inria.fr>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_array_equal
|
||||
from sklearn.utils._testing import assert_warns
|
||||
|
||||
from sklearn import datasets
|
||||
from sklearn.covariance import empirical_covariance, EmpiricalCovariance, \
|
||||
ShrunkCovariance, shrunk_covariance, \
|
||||
LedoitWolf, ledoit_wolf, ledoit_wolf_shrinkage, OAS, oas
|
||||
|
||||
X, _ = datasets.load_diabetes(return_X_y=True)
|
||||
X_1d = X[:, 0]
|
||||
n_samples, n_features = X.shape
|
||||
|
||||
|
||||
def test_covariance():
|
||||
# Tests Covariance module on a simple dataset.
|
||||
# test covariance fit from data
|
||||
cov = EmpiricalCovariance()
|
||||
cov.fit(X)
|
||||
emp_cov = empirical_covariance(X)
|
||||
assert_array_almost_equal(emp_cov, cov.covariance_, 4)
|
||||
assert_almost_equal(cov.error_norm(emp_cov), 0)
|
||||
assert_almost_equal(
|
||||
cov.error_norm(emp_cov, norm='spectral'), 0)
|
||||
assert_almost_equal(
|
||||
cov.error_norm(emp_cov, norm='frobenius'), 0)
|
||||
assert_almost_equal(
|
||||
cov.error_norm(emp_cov, scaling=False), 0)
|
||||
assert_almost_equal(
|
||||
cov.error_norm(emp_cov, squared=False), 0)
|
||||
with pytest.raises(NotImplementedError):
|
||||
cov.error_norm(emp_cov, norm='foo')
|
||||
# Mahalanobis distances computation test
|
||||
mahal_dist = cov.mahalanobis(X)
|
||||
assert np.amin(mahal_dist) > 0
|
||||
|
||||
# test with n_features = 1
|
||||
X_1d = X[:, 0].reshape((-1, 1))
|
||||
cov = EmpiricalCovariance()
|
||||
cov.fit(X_1d)
|
||||
assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)
|
||||
assert_almost_equal(cov.error_norm(empirical_covariance(X_1d)), 0)
|
||||
assert_almost_equal(
|
||||
cov.error_norm(empirical_covariance(X_1d), norm='spectral'), 0)
|
||||
|
||||
# test with one sample
|
||||
# Create X with 1 sample and 5 features
|
||||
X_1sample = np.arange(5).reshape(1, 5)
|
||||
cov = EmpiricalCovariance()
|
||||
assert_warns(UserWarning, cov.fit, X_1sample)
|
||||
assert_array_almost_equal(cov.covariance_,
|
||||
np.zeros(shape=(5, 5), dtype=np.float64))
|
||||
|
||||
# test integer type
|
||||
X_integer = np.asarray([[0, 1], [1, 0]])
|
||||
result = np.asarray([[0.25, -0.25], [-0.25, 0.25]])
|
||||
assert_array_almost_equal(empirical_covariance(X_integer), result)
|
||||
|
||||
# test centered case
|
||||
cov = EmpiricalCovariance(assume_centered=True)
|
||||
cov.fit(X)
|
||||
assert_array_equal(cov.location_, np.zeros(X.shape[1]))
|
||||
|
||||
|
||||
def test_shrunk_covariance():
|
||||
# Tests ShrunkCovariance module on a simple dataset.
|
||||
# compare shrunk covariance obtained from data and from MLE estimate
|
||||
cov = ShrunkCovariance(shrinkage=0.5)
|
||||
cov.fit(X)
|
||||
assert_array_almost_equal(
|
||||
shrunk_covariance(empirical_covariance(X), shrinkage=0.5),
|
||||
cov.covariance_, 4)
|
||||
|
||||
# same test with shrinkage not provided
|
||||
cov = ShrunkCovariance()
|
||||
cov.fit(X)
|
||||
assert_array_almost_equal(
|
||||
shrunk_covariance(empirical_covariance(X)), cov.covariance_, 4)
|
||||
|
||||
# same test with shrinkage = 0 (<==> empirical_covariance)
|
||||
cov = ShrunkCovariance(shrinkage=0.)
|
||||
cov.fit(X)
|
||||
assert_array_almost_equal(empirical_covariance(X), cov.covariance_, 4)
|
||||
|
||||
# test with n_features = 1
|
||||
X_1d = X[:, 0].reshape((-1, 1))
|
||||
cov = ShrunkCovariance(shrinkage=0.3)
|
||||
cov.fit(X_1d)
|
||||
assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)
|
||||
|
||||
# test shrinkage coeff on a simple data set (without saving precision)
|
||||
cov = ShrunkCovariance(shrinkage=0.5, store_precision=False)
|
||||
cov.fit(X)
|
||||
assert(cov.precision_ is None)
|
||||
|
||||
|
||||
def test_ledoit_wolf():
|
||||
# Tests LedoitWolf module on a simple dataset.
|
||||
# test shrinkage coeff on a simple data set
|
||||
X_centered = X - X.mean(axis=0)
|
||||
lw = LedoitWolf(assume_centered=True)
|
||||
lw.fit(X_centered)
|
||||
shrinkage_ = lw.shrinkage_
|
||||
|
||||
score_ = lw.score(X_centered)
|
||||
assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
|
||||
assume_centered=True),
|
||||
shrinkage_)
|
||||
assert_almost_equal(ledoit_wolf_shrinkage(X_centered, assume_centered=True,
|
||||
block_size=6),
|
||||
shrinkage_)
|
||||
# compare shrunk covariance obtained from data and from MLE estimate
|
||||
lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_centered,
|
||||
assume_centered=True)
|
||||
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
|
||||
assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
|
||||
# compare estimates given by LW and ShrunkCovariance
|
||||
scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
|
||||
scov.fit(X_centered)
|
||||
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
|
||||
|
||||
# test with n_features = 1
|
||||
X_1d = X[:, 0].reshape((-1, 1))
|
||||
lw = LedoitWolf(assume_centered=True)
|
||||
lw.fit(X_1d)
|
||||
lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d,
|
||||
assume_centered=True)
|
||||
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
|
||||
assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
|
||||
assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)
|
||||
|
||||
# test shrinkage coeff on a simple data set (without saving precision)
|
||||
lw = LedoitWolf(store_precision=False, assume_centered=True)
|
||||
lw.fit(X_centered)
|
||||
assert_almost_equal(lw.score(X_centered), score_, 4)
|
||||
assert(lw.precision_ is None)
|
||||
|
||||
# Same tests without assuming centered data
|
||||
# test shrinkage coeff on a simple data set
|
||||
lw = LedoitWolf()
|
||||
lw.fit(X)
|
||||
assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
|
||||
assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
|
||||
assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
|
||||
assert_almost_equal(lw.score(X), score_, 4)
|
||||
# compare shrunk covariance obtained from data and from MLE estimate
|
||||
lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X)
|
||||
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
|
||||
assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
|
||||
# compare estimates given by LW and ShrunkCovariance
|
||||
scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
|
||||
scov.fit(X)
|
||||
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
|
||||
|
||||
# test with n_features = 1
|
||||
X_1d = X[:, 0].reshape((-1, 1))
|
||||
lw = LedoitWolf()
|
||||
lw.fit(X_1d)
|
||||
lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d)
|
||||
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
|
||||
assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
|
||||
assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)
|
||||
|
||||
# test with one sample
|
||||
# warning should be raised when using only 1 sample
|
||||
X_1sample = np.arange(5).reshape(1, 5)
|
||||
lw = LedoitWolf()
|
||||
assert_warns(UserWarning, lw.fit, X_1sample)
|
||||
assert_array_almost_equal(lw.covariance_,
|
||||
np.zeros(shape=(5, 5), dtype=np.float64))
|
||||
|
||||
# test shrinkage coeff on a simple data set (without saving precision)
|
||||
lw = LedoitWolf(store_precision=False)
|
||||
lw.fit(X)
|
||||
assert_almost_equal(lw.score(X), score_, 4)
|
||||
assert(lw.precision_ is None)
|
||||
|
||||
|
||||
def _naive_ledoit_wolf_shrinkage(X):
|
||||
# A simple implementation of the formulas from Ledoit & Wolf
|
||||
|
||||
# The computation below achieves the following computations of the
|
||||
# "O. Ledoit and M. Wolf, A Well-Conditioned Estimator for
|
||||
# Large-Dimensional Covariance Matrices"
|
||||
# beta and delta are given in the beginning of section 3.2
|
||||
n_samples, n_features = X.shape
|
||||
emp_cov = empirical_covariance(X, assume_centered=False)
|
||||
mu = np.trace(emp_cov) / n_features
|
||||
delta_ = emp_cov.copy()
|
||||
delta_.flat[::n_features + 1] -= mu
|
||||
delta = (delta_ ** 2).sum() / n_features
|
||||
X2 = X ** 2
|
||||
beta_ = 1. / (n_features * n_samples) \
|
||||
* np.sum(np.dot(X2.T, X2) / n_samples - emp_cov ** 2)
|
||||
|
||||
beta = min(beta_, delta)
|
||||
shrinkage = beta / delta
|
||||
return shrinkage
|
||||
|
||||
|
||||
def test_ledoit_wolf_small():
|
||||
# Compare our blocked implementation to the naive implementation
|
||||
X_small = X[:, :4]
|
||||
lw = LedoitWolf()
|
||||
lw.fit(X_small)
|
||||
shrinkage_ = lw.shrinkage_
|
||||
|
||||
assert_almost_equal(shrinkage_, _naive_ledoit_wolf_shrinkage(X_small))
|
||||
|
||||
|
||||
def test_ledoit_wolf_large():
|
||||
# test that ledoit_wolf doesn't error on data that is wider than block_size
|
||||
rng = np.random.RandomState(0)
|
||||
# use a number of features that is larger than the block-size
|
||||
X = rng.normal(size=(10, 20))
|
||||
lw = LedoitWolf(block_size=10).fit(X)
|
||||
# check that covariance is about diagonal (random normal noise)
|
||||
assert_almost_equal(lw.covariance_, np.eye(20), 0)
|
||||
cov = lw.covariance_
|
||||
|
||||
# check that the result is consistent with not splitting data into blocks.
|
||||
lw = LedoitWolf(block_size=25).fit(X)
|
||||
assert_almost_equal(lw.covariance_, cov)
|
||||
|
||||
|
||||
def test_oas():
|
||||
# Tests OAS module on a simple dataset.
|
||||
# test shrinkage coeff on a simple data set
|
||||
X_centered = X - X.mean(axis=0)
|
||||
oa = OAS(assume_centered=True)
|
||||
oa.fit(X_centered)
|
||||
shrinkage_ = oa.shrinkage_
|
||||
score_ = oa.score(X_centered)
|
||||
# compare shrunk covariance obtained from data and from MLE estimate
|
||||
oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_centered,
|
||||
assume_centered=True)
|
||||
assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
|
||||
assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
|
||||
# compare estimates given by OAS and ShrunkCovariance
|
||||
scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
|
||||
scov.fit(X_centered)
|
||||
assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)
|
||||
|
||||
# test with n_features = 1
|
||||
X_1d = X[:, 0:1]
|
||||
oa = OAS(assume_centered=True)
|
||||
oa.fit(X_1d)
|
||||
oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d, assume_centered=True)
|
||||
assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
|
||||
assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
|
||||
assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4)
|
||||
|
||||
# test shrinkage coeff on a simple data set (without saving precision)
|
||||
oa = OAS(store_precision=False, assume_centered=True)
|
||||
oa.fit(X_centered)
|
||||
assert_almost_equal(oa.score(X_centered), score_, 4)
|
||||
assert(oa.precision_ is None)
|
||||
|
||||
# Same tests without assuming centered data--------------------------------
|
||||
# test shrinkage coeff on a simple data set
|
||||
oa = OAS()
|
||||
oa.fit(X)
|
||||
assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
|
||||
assert_almost_equal(oa.score(X), score_, 4)
|
||||
# compare shrunk covariance obtained from data and from MLE estimate
|
||||
oa_cov_from_mle, oa_shrinkage_from_mle = oas(X)
|
||||
assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
|
||||
assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
|
||||
# compare estimates given by OAS and ShrunkCovariance
|
||||
scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
|
||||
scov.fit(X)
|
||||
assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)
|
||||
|
||||
# test with n_features = 1
|
||||
X_1d = X[:, 0].reshape((-1, 1))
|
||||
oa = OAS()
|
||||
oa.fit(X_1d)
|
||||
oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d)
|
||||
assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
|
||||
assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
|
||||
assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)
|
||||
|
||||
# test with one sample
|
||||
# warning should be raised when using only 1 sample
|
||||
X_1sample = np.arange(5).reshape(1, 5)
|
||||
oa = OAS()
|
||||
assert_warns(UserWarning, oa.fit, X_1sample)
|
||||
assert_array_almost_equal(oa.covariance_,
|
||||
np.zeros(shape=(5, 5), dtype=np.float64))
|
||||
|
||||
# test shrinkage coeff on a simple data set (without saving precision)
|
||||
oa = OAS(store_precision=False)
|
||||
oa.fit(X)
|
||||
assert_almost_equal(oa.score(X), score_, 4)
|
||||
assert(oa.precision_ is None)
|
|
@ -0,0 +1,45 @@
|
|||
"""
|
||||
Testing for Elliptic Envelope algorithm (sklearn.covariance.elliptic_envelope).
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.covariance import EllipticEnvelope
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_array_equal
|
||||
from sklearn.exceptions import NotFittedError
|
||||
|
||||
|
||||
def test_elliptic_envelope():
|
||||
rnd = np.random.RandomState(0)
|
||||
X = rnd.randn(100, 10)
|
||||
clf = EllipticEnvelope(contamination=0.1)
|
||||
with pytest.raises(NotFittedError):
|
||||
clf.predict(X)
|
||||
with pytest.raises(NotFittedError):
|
||||
clf.decision_function(X)
|
||||
clf.fit(X)
|
||||
y_pred = clf.predict(X)
|
||||
scores = clf.score_samples(X)
|
||||
decisions = clf.decision_function(X)
|
||||
|
||||
assert_array_almost_equal(
|
||||
scores, -clf.mahalanobis(X))
|
||||
assert_array_almost_equal(clf.mahalanobis(X), clf.dist_)
|
||||
assert_almost_equal(clf.score(X, np.ones(100)),
|
||||
(100 - y_pred[y_pred == -1].size) / 100.)
|
||||
assert(sum(y_pred == -1) == sum(decisions < 0))
|
||||
|
||||
|
||||
def test_score_samples():
|
||||
X_train = [[1, 1], [1, 2], [2, 1]]
|
||||
clf1 = EllipticEnvelope(contamination=0.2).fit(X_train)
|
||||
clf2 = EllipticEnvelope().fit(X_train)
|
||||
assert_array_equal(clf1.score_samples([[2., 2.]]),
|
||||
clf1.decision_function([[2., 2.]]) + clf1.offset_)
|
||||
assert_array_equal(clf2.score_samples([[2., 2.]]),
|
||||
clf2.decision_function([[2., 2.]]) + clf2.offset_)
|
||||
assert_array_equal(clf1.score_samples([[2., 2.]]),
|
||||
clf2.score_samples([[2., 2.]]))
|
|
@ -0,0 +1,150 @@
|
|||
""" Test the graphical_lasso module.
|
||||
"""
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
from scipy import linalg
|
||||
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_array_less
|
||||
|
||||
from sklearn.covariance import (graphical_lasso, GraphicalLasso,
|
||||
GraphicalLassoCV, empirical_covariance)
|
||||
from sklearn.datasets import make_sparse_spd_matrix
|
||||
from io import StringIO
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn import datasets
|
||||
|
||||
|
||||
def test_graphical_lasso(random_state=0):
|
||||
# Sample data from a sparse multivariate normal
|
||||
dim = 20
|
||||
n_samples = 100
|
||||
random_state = check_random_state(random_state)
|
||||
prec = make_sparse_spd_matrix(dim, alpha=.95,
|
||||
random_state=random_state)
|
||||
cov = linalg.inv(prec)
|
||||
X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
|
||||
emp_cov = empirical_covariance(X)
|
||||
|
||||
for alpha in (0., .1, .25):
|
||||
covs = dict()
|
||||
icovs = dict()
|
||||
for method in ('cd', 'lars'):
|
||||
cov_, icov_, costs = graphical_lasso(emp_cov, return_costs=True,
|
||||
alpha=alpha, mode=method)
|
||||
covs[method] = cov_
|
||||
icovs[method] = icov_
|
||||
costs, dual_gap = np.array(costs).T
|
||||
# Check that the costs always decrease (doesn't hold if alpha == 0)
|
||||
if not alpha == 0:
|
||||
assert_array_less(np.diff(costs), 0)
|
||||
# Check that the 2 approaches give similar results
|
||||
assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4)
|
||||
assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4)
|
||||
|
||||
# Smoke test the estimator
|
||||
model = GraphicalLasso(alpha=.25).fit(X)
|
||||
model.score(X)
|
||||
assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4)
|
||||
assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4)
|
||||
|
||||
# For a centered matrix, assume_centered could be chosen True or False
|
||||
# Check that this returns indeed the same result for centered data
|
||||
Z = X - X.mean(0)
|
||||
precs = list()
|
||||
for assume_centered in (False, True):
|
||||
prec_ = GraphicalLasso(
|
||||
assume_centered=assume_centered).fit(Z).precision_
|
||||
precs.append(prec_)
|
||||
assert_array_almost_equal(precs[0], precs[1])
|
||||
|
||||
|
||||
def test_graphical_lasso_iris():
|
||||
# Hard-coded solution from R glasso package for alpha=1.0
|
||||
# (need to set penalize.diagonal to FALSE)
|
||||
cov_R = np.array([
|
||||
[0.68112222, 0.0000000, 0.265820, 0.02464314],
|
||||
[0.00000000, 0.1887129, 0.000000, 0.00000000],
|
||||
[0.26582000, 0.0000000, 3.095503, 0.28697200],
|
||||
[0.02464314, 0.0000000, 0.286972, 0.57713289]
|
||||
])
|
||||
icov_R = np.array([
|
||||
[1.5190747, 0.000000, -0.1304475, 0.0000000],
|
||||
[0.0000000, 5.299055, 0.0000000, 0.0000000],
|
||||
[-0.1304475, 0.000000, 0.3498624, -0.1683946],
|
||||
[0.0000000, 0.000000, -0.1683946, 1.8164353]
|
||||
])
|
||||
X = datasets.load_iris().data
|
||||
emp_cov = empirical_covariance(X)
|
||||
for method in ('cd', 'lars'):
|
||||
cov, icov = graphical_lasso(emp_cov, alpha=1.0, return_costs=False,
|
||||
mode=method)
|
||||
assert_array_almost_equal(cov, cov_R)
|
||||
assert_array_almost_equal(icov, icov_R)
|
||||
|
||||
|
||||
def test_graph_lasso_2D():
|
||||
# Hard-coded solution from Python skggm package
|
||||
# obtained by calling `quic(emp_cov, lam=.1, tol=1e-8)`
|
||||
cov_skggm = np.array([[3.09550269, 1.186972],
|
||||
[1.186972, 0.57713289]])
|
||||
|
||||
icov_skggm = np.array([[1.52836773, -3.14334831],
|
||||
[-3.14334831, 8.19753385]])
|
||||
X = datasets.load_iris().data[:, 2:]
|
||||
emp_cov = empirical_covariance(X)
|
||||
for method in ('cd', 'lars'):
|
||||
cov, icov = graphical_lasso(emp_cov, alpha=.1, return_costs=False,
|
||||
mode=method)
|
||||
assert_array_almost_equal(cov, cov_skggm)
|
||||
assert_array_almost_equal(icov, icov_skggm)
|
||||
|
||||
|
||||
def test_graphical_lasso_iris_singular():
|
||||
# Small subset of rows to test the rank-deficient case
|
||||
# Need to choose samples such that none of the variances are zero
|
||||
indices = np.arange(10, 13)
|
||||
|
||||
# Hard-coded solution from R glasso package for alpha=0.01
|
||||
cov_R = np.array([
|
||||
[0.08, 0.056666662595, 0.00229729713223, 0.00153153142149],
|
||||
[0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222],
|
||||
[0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009],
|
||||
[0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222]
|
||||
])
|
||||
icov_R = np.array([
|
||||
[24.42244057, -16.831679593, 0.0, 0.0],
|
||||
[-16.83168201, 24.351841681, -6.206896552, -12.5],
|
||||
[0.0, -6.206896171, 153.103448276, 0.0],
|
||||
[0.0, -12.499999143, 0.0, 462.5]
|
||||
])
|
||||
X = datasets.load_iris().data[indices, :]
|
||||
emp_cov = empirical_covariance(X)
|
||||
for method in ('cd', 'lars'):
|
||||
cov, icov = graphical_lasso(emp_cov, alpha=0.01, return_costs=False,
|
||||
mode=method)
|
||||
assert_array_almost_equal(cov, cov_R, decimal=5)
|
||||
assert_array_almost_equal(icov, icov_R, decimal=5)
|
||||
|
||||
|
||||
def test_graphical_lasso_cv(random_state=1):
|
||||
# Sample data from a sparse multivariate normal
|
||||
dim = 5
|
||||
n_samples = 6
|
||||
random_state = check_random_state(random_state)
|
||||
prec = make_sparse_spd_matrix(dim, alpha=.96,
|
||||
random_state=random_state)
|
||||
cov = linalg.inv(prec)
|
||||
X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
|
||||
# Capture stdout, to smoke test the verbose mode
|
||||
orig_stdout = sys.stdout
|
||||
try:
|
||||
sys.stdout = StringIO()
|
||||
# We need verbose very high so that Parallel prints on stdout
|
||||
GraphicalLassoCV(verbose=100, alphas=5, tol=1e-1).fit(X)
|
||||
finally:
|
||||
sys.stdout = orig_stdout
|
||||
|
||||
# Smoke test with specified alphas
|
||||
GraphicalLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)
|
|
@ -0,0 +1,168 @@
|
|||
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
|
||||
# Gael Varoquaux <gael.varoquaux@normalesup.org>
|
||||
# Virgile Fritsch <virgile.fritsch@inria.fr>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_raise_message
|
||||
from sklearn.utils._testing import assert_warns_message
|
||||
|
||||
from sklearn import datasets
|
||||
from sklearn.covariance import empirical_covariance, MinCovDet
|
||||
from sklearn.covariance import fast_mcd
|
||||
|
||||
X = datasets.load_iris().data
|
||||
X_1d = X[:, 0]
|
||||
n_samples, n_features = X.shape
|
||||
|
||||
|
||||
def test_mcd():
|
||||
# Tests the FastMCD algorithm implementation
|
||||
# Small data set
|
||||
# test without outliers (random independent normal data)
|
||||
launch_mcd_on_dataset(100, 5, 0, 0.01, 0.1, 80)
|
||||
# test with a contaminated data set (medium contamination)
|
||||
launch_mcd_on_dataset(100, 5, 20, 0.01, 0.01, 70)
|
||||
# test with a contaminated data set (strong contamination)
|
||||
launch_mcd_on_dataset(100, 5, 40, 0.1, 0.1, 50)
|
||||
|
||||
# Medium data set
|
||||
launch_mcd_on_dataset(1000, 5, 450, 0.1, 0.1, 540)
|
||||
|
||||
# Large data set
|
||||
launch_mcd_on_dataset(1700, 5, 800, 0.1, 0.1, 870)
|
||||
|
||||
# 1D data set
|
||||
launch_mcd_on_dataset(500, 1, 100, 0.001, 0.001, 350)
|
||||
|
||||
|
||||
def test_fast_mcd_on_invalid_input():
|
||||
X = np.arange(100)
|
||||
assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead',
|
||||
fast_mcd, X)
|
||||
|
||||
|
||||
def test_mcd_class_on_invalid_input():
|
||||
X = np.arange(100)
|
||||
mcd = MinCovDet()
|
||||
assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead',
|
||||
mcd.fit, X)
|
||||
|
||||
|
||||
def launch_mcd_on_dataset(n_samples, n_features, n_outliers, tol_loc, tol_cov,
|
||||
tol_support):
|
||||
|
||||
rand_gen = np.random.RandomState(0)
|
||||
data = rand_gen.randn(n_samples, n_features)
|
||||
# add some outliers
|
||||
outliers_index = rand_gen.permutation(n_samples)[:n_outliers]
|
||||
outliers_offset = 10. * \
|
||||
(rand_gen.randint(2, size=(n_outliers, n_features)) - 0.5)
|
||||
data[outliers_index] += outliers_offset
|
||||
inliers_mask = np.ones(n_samples).astype(bool)
|
||||
inliers_mask[outliers_index] = False
|
||||
|
||||
pure_data = data[inliers_mask]
|
||||
# compute MCD by fitting an object
|
||||
mcd_fit = MinCovDet(random_state=rand_gen).fit(data)
|
||||
T = mcd_fit.location_
|
||||
S = mcd_fit.covariance_
|
||||
H = mcd_fit.support_
|
||||
# compare with the estimates learnt from the inliers
|
||||
error_location = np.mean((pure_data.mean(0) - T) ** 2)
|
||||
assert(error_location < tol_loc)
|
||||
error_cov = np.mean((empirical_covariance(pure_data) - S) ** 2)
|
||||
assert(error_cov < tol_cov)
|
||||
assert(np.sum(H) >= tol_support)
|
||||
assert_array_almost_equal(mcd_fit.mahalanobis(data), mcd_fit.dist_)
|
||||
|
||||
|
||||
def test_mcd_issue1127():
|
||||
# Check that the code does not break with X.shape = (3, 1)
|
||||
# (i.e. n_support = n_samples)
|
||||
rnd = np.random.RandomState(0)
|
||||
X = rnd.normal(size=(3, 1))
|
||||
mcd = MinCovDet()
|
||||
mcd.fit(X)
|
||||
|
||||
|
||||
def test_mcd_issue3367():
|
||||
# Check that MCD completes when the covariance matrix is singular
|
||||
# i.e. one of the rows and columns are all zeros
|
||||
rand_gen = np.random.RandomState(0)
|
||||
|
||||
# Think of these as the values for X and Y -> 10 values between -5 and 5
|
||||
data_values = np.linspace(-5, 5, 10).tolist()
|
||||
# Get the cartesian product of all possible coordinate pairs from above set
|
||||
data = np.array(list(itertools.product(data_values, data_values)))
|
||||
|
||||
# Add a third column that's all zeros to make our data a set of point
|
||||
# within a plane, which means that the covariance matrix will be singular
|
||||
data = np.hstack((data, np.zeros((data.shape[0], 1))))
|
||||
|
||||
# The below line of code should raise an exception if the covariance matrix
|
||||
# is singular. As a further test, since we have points in XYZ, the
|
||||
# principle components (Eigenvectors) of these directly relate to the
|
||||
# geometry of the points. Since it's a plane, we should be able to test
|
||||
# that the Eigenvector that corresponds to the smallest Eigenvalue is the
|
||||
# plane normal, specifically [0, 0, 1], since everything is in the XY plane
|
||||
# (as I've set it up above). To do this one would start by:
|
||||
#
|
||||
# evals, evecs = np.linalg.eigh(mcd_fit.covariance_)
|
||||
# normal = evecs[:, np.argmin(evals)]
|
||||
#
|
||||
# After which we need to assert that our `normal` is equal to [0, 0, 1].
|
||||
# Do note that there is floating point error associated with this, so it's
|
||||
# best to subtract the two and then compare some small tolerance (e.g.
|
||||
# 1e-12).
|
||||
MinCovDet(random_state=rand_gen).fit(data)
|
||||
|
||||
|
||||
def test_mcd_support_covariance_is_zero():
|
||||
# Check that MCD returns a ValueError with informative message when the
|
||||
# covariance of the support data is equal to 0.
|
||||
X_1 = np.array([0.5, 0.1, 0.1, 0.1, 0.957, 0.1, 0.1, 0.1, 0.4285, 0.1])
|
||||
X_1 = X_1.reshape(-1, 1)
|
||||
X_2 = np.array([0.5, 0.3, 0.3, 0.3, 0.957, 0.3, 0.3, 0.3, 0.4285, 0.3])
|
||||
X_2 = X_2.reshape(-1, 1)
|
||||
msg = ('The covariance matrix of the support data is equal to 0, try to '
|
||||
'increase support_fraction')
|
||||
for X in [X_1, X_2]:
|
||||
assert_raise_message(ValueError, msg, MinCovDet().fit, X)
|
||||
|
||||
|
||||
def test_mcd_increasing_det_warning():
|
||||
# Check that a warning is raised if we observe increasing determinants
|
||||
# during the c_step. In theory the sequence of determinants should be
|
||||
# decreasing. Increasing determinants are likely due to ill-conditioned
|
||||
# covariance matrices that result in poor precision matrices.
|
||||
|
||||
X = [[5.1, 3.5, 1.4, 0.2],
|
||||
[4.9, 3.0, 1.4, 0.2],
|
||||
[4.7, 3.2, 1.3, 0.2],
|
||||
[4.6, 3.1, 1.5, 0.2],
|
||||
[5.0, 3.6, 1.4, 0.2],
|
||||
[4.6, 3.4, 1.4, 0.3],
|
||||
[5.0, 3.4, 1.5, 0.2],
|
||||
[4.4, 2.9, 1.4, 0.2],
|
||||
[4.9, 3.1, 1.5, 0.1],
|
||||
[5.4, 3.7, 1.5, 0.2],
|
||||
[4.8, 3.4, 1.6, 0.2],
|
||||
[4.8, 3.0, 1.4, 0.1],
|
||||
[4.3, 3.0, 1.1, 0.1],
|
||||
[5.1, 3.5, 1.4, 0.3],
|
||||
[5.7, 3.8, 1.7, 0.3],
|
||||
[5.4, 3.4, 1.7, 0.2],
|
||||
[4.6, 3.6, 1.0, 0.2],
|
||||
[5.0, 3.0, 1.6, 0.2],
|
||||
[5.2, 3.5, 1.5, 0.2]]
|
||||
|
||||
mcd = MinCovDet(random_state=1)
|
||||
assert_warns_message(RuntimeWarning,
|
||||
"Determinant has increased",
|
||||
mcd.fit, X)
|
Loading…
Add table
Add a link
Reference in a new issue