Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
24
venv/Lib/site-packages/sklearn/svm/__init__.py
Normal file
24
venv/Lib/site-packages/sklearn/svm/__init__.py
Normal file
|
@ -0,0 +1,24 @@
|
|||
"""
|
||||
The :mod:`sklearn.svm` module includes Support Vector Machine algorithms.
|
||||
"""
|
||||
|
||||
# See http://scikit-learn.sourceforge.net/modules/svm.html for complete
|
||||
# documentation.
|
||||
|
||||
# Author: Fabian Pedregosa <fabian.pedregosa@inria.fr> with help from
|
||||
# the scikit-learn community. LibSVM and LibLinear are copyright
|
||||
# of their respective owners.
|
||||
# License: BSD 3 clause (C) INRIA 2010
|
||||
|
||||
from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, \
|
||||
LinearSVR
|
||||
from ._bounds import l1_min_c
|
||||
|
||||
__all__ = ['LinearSVC',
|
||||
'LinearSVR',
|
||||
'NuSVC',
|
||||
'NuSVR',
|
||||
'OneClassSVM',
|
||||
'SVC',
|
||||
'SVR',
|
||||
'l1_min_c']
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
986
venv/Lib/site-packages/sklearn/svm/_base.py
Normal file
986
venv/Lib/site-packages/sklearn/svm/_base.py
Normal file
|
@ -0,0 +1,986 @@
|
|||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
import warnings
|
||||
from abc import ABCMeta, abstractmethod
|
||||
|
||||
# mypy error: error: Module 'sklearn.svm' has no attribute '_libsvm'
|
||||
# (and same for other imports)
|
||||
from . import _libsvm as libsvm # type: ignore
|
||||
from .import _liblinear as liblinear # type: ignore
|
||||
from . import _libsvm_sparse as libsvm_sparse # type: ignore
|
||||
from ..base import BaseEstimator, ClassifierMixin
|
||||
from ..preprocessing import LabelEncoder
|
||||
from ..utils.multiclass import _ovr_decision_function
|
||||
from ..utils import check_array, check_random_state
|
||||
from ..utils import column_or_1d
|
||||
from ..utils import compute_class_weight
|
||||
from ..utils.extmath import safe_sparse_dot
|
||||
from ..utils.validation import check_is_fitted, _check_large_sparse
|
||||
from ..utils.validation import _num_samples
|
||||
from ..utils.validation import _check_sample_weight, check_consistent_length
|
||||
from ..utils.multiclass import check_classification_targets
|
||||
from ..exceptions import ConvergenceWarning
|
||||
from ..exceptions import NotFittedError
|
||||
|
||||
|
||||
LIBSVM_IMPL = ['c_svc', 'nu_svc', 'one_class', 'epsilon_svr', 'nu_svr']
|
||||
|
||||
|
||||
def _one_vs_one_coef(dual_coef, n_support, support_vectors):
|
||||
"""Generate primal coefficients from dual coefficients
|
||||
for the one-vs-one multi class LibSVM in the case
|
||||
of a linear kernel."""
|
||||
|
||||
# get 1vs1 weights for all n*(n-1) classifiers.
|
||||
# this is somewhat messy.
|
||||
# shape of dual_coef_ is nSV * (n_classes -1)
|
||||
# see docs for details
|
||||
n_class = dual_coef.shape[0] + 1
|
||||
|
||||
# XXX we could do preallocation of coef but
|
||||
# would have to take care in the sparse case
|
||||
coef = []
|
||||
sv_locs = np.cumsum(np.hstack([[0], n_support]))
|
||||
for class1 in range(n_class):
|
||||
# SVs for class1:
|
||||
sv1 = support_vectors[sv_locs[class1]:sv_locs[class1 + 1], :]
|
||||
for class2 in range(class1 + 1, n_class):
|
||||
# SVs for class1:
|
||||
sv2 = support_vectors[sv_locs[class2]:sv_locs[class2 + 1], :]
|
||||
|
||||
# dual coef for class1 SVs:
|
||||
alpha1 = dual_coef[class2 - 1, sv_locs[class1]:sv_locs[class1 + 1]]
|
||||
# dual coef for class2 SVs:
|
||||
alpha2 = dual_coef[class1, sv_locs[class2]:sv_locs[class2 + 1]]
|
||||
# build weight for class1 vs class2
|
||||
|
||||
coef.append(safe_sparse_dot(alpha1, sv1)
|
||||
+ safe_sparse_dot(alpha2, sv2))
|
||||
return coef
|
||||
|
||||
|
||||
class BaseLibSVM(BaseEstimator, metaclass=ABCMeta):
|
||||
"""Base class for estimators that use libsvm as backing library
|
||||
|
||||
This implements support vector machine classification and regression.
|
||||
|
||||
Parameter documentation is in the derived `SVC` class.
|
||||
"""
|
||||
|
||||
# The order of these must match the integer values in LibSVM.
|
||||
# XXX These are actually the same in the dense case. Need to factor
|
||||
# this out.
|
||||
_sparse_kernels = ["linear", "poly", "rbf", "sigmoid", "precomputed"]
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self, kernel, degree, gamma, coef0,
|
||||
tol, C, nu, epsilon, shrinking, probability, cache_size,
|
||||
class_weight, verbose, max_iter, random_state):
|
||||
|
||||
if self._impl not in LIBSVM_IMPL:
|
||||
raise ValueError("impl should be one of %s, %s was given" % (
|
||||
LIBSVM_IMPL, self._impl))
|
||||
|
||||
if gamma == 0:
|
||||
msg = ("The gamma value of 0.0 is invalid. Use 'auto' to set"
|
||||
" gamma to a value of 1 / n_features.")
|
||||
raise ValueError(msg)
|
||||
|
||||
self.kernel = kernel
|
||||
self.degree = degree
|
||||
self.gamma = gamma
|
||||
self.coef0 = coef0
|
||||
self.tol = tol
|
||||
self.C = C
|
||||
self.nu = nu
|
||||
self.epsilon = epsilon
|
||||
self.shrinking = shrinking
|
||||
self.probability = probability
|
||||
self.cache_size = cache_size
|
||||
self.class_weight = class_weight
|
||||
self.verbose = verbose
|
||||
self.max_iter = max_iter
|
||||
self.random_state = random_state
|
||||
|
||||
@property
|
||||
def _pairwise(self):
|
||||
# Used by cross_val_score.
|
||||
return self.kernel == "precomputed"
|
||||
|
||||
def fit(self, X, y, sample_weight=None):
|
||||
"""Fit the SVM model according to the given training data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features) \
|
||||
or (n_samples, n_samples)
|
||||
Training vectors, where n_samples is the number of samples
|
||||
and n_features is the number of features.
|
||||
For kernel="precomputed", the expected shape of X is
|
||||
(n_samples, n_samples).
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
Target values (class labels in classification, real numbers in
|
||||
regression)
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Per-sample weights. Rescale C per sample. Higher weights
|
||||
force the classifier to put more emphasis on these points.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
|
||||
Notes
|
||||
-----
|
||||
If X and y are not C-ordered and contiguous arrays of np.float64 and
|
||||
X is not a scipy.sparse.csr_matrix, X and/or y may be copied.
|
||||
|
||||
If X is a dense array, then the other methods will not support sparse
|
||||
matrices as input.
|
||||
"""
|
||||
|
||||
rnd = check_random_state(self.random_state)
|
||||
|
||||
sparse = sp.isspmatrix(X)
|
||||
if sparse and self.kernel == "precomputed":
|
||||
raise TypeError("Sparse precomputed kernels are not supported.")
|
||||
self._sparse = sparse and not callable(self.kernel)
|
||||
|
||||
if hasattr(self, 'decision_function_shape'):
|
||||
if self.decision_function_shape not in ('ovr', 'ovo'):
|
||||
raise ValueError(
|
||||
f"decision_function_shape must be either 'ovr' or 'ovo', "
|
||||
f"got {self.decision_function_shape}."
|
||||
)
|
||||
|
||||
if callable(self.kernel):
|
||||
check_consistent_length(X, y)
|
||||
else:
|
||||
X, y = self._validate_data(X, y, dtype=np.float64,
|
||||
order='C', accept_sparse='csr',
|
||||
accept_large_sparse=False)
|
||||
|
||||
y = self._validate_targets(y)
|
||||
|
||||
sample_weight = np.asarray([]
|
||||
if sample_weight is None
|
||||
else sample_weight, dtype=np.float64)
|
||||
solver_type = LIBSVM_IMPL.index(self._impl)
|
||||
|
||||
# input validation
|
||||
n_samples = _num_samples(X)
|
||||
if solver_type != 2 and n_samples != y.shape[0]:
|
||||
raise ValueError("X and y have incompatible shapes.\n" +
|
||||
"X has %s samples, but y has %s." %
|
||||
(n_samples, y.shape[0]))
|
||||
|
||||
if self.kernel == "precomputed" and n_samples != X.shape[1]:
|
||||
raise ValueError("Precomputed matrix must be a square matrix."
|
||||
" Input is a {}x{} matrix."
|
||||
.format(X.shape[0], X.shape[1]))
|
||||
|
||||
if sample_weight.shape[0] > 0 and sample_weight.shape[0] != n_samples:
|
||||
raise ValueError("sample_weight and X have incompatible shapes: "
|
||||
"%r vs %r\n"
|
||||
"Note: Sparse matrices cannot be indexed w/"
|
||||
"boolean masks (use `indices=True` in CV)."
|
||||
% (sample_weight.shape, X.shape))
|
||||
|
||||
kernel = 'precomputed' if callable(self.kernel) else self.kernel
|
||||
|
||||
if kernel == 'precomputed':
|
||||
# unused but needs to be a float for cython code that ignores
|
||||
# it anyway
|
||||
self._gamma = 0.
|
||||
elif isinstance(self.gamma, str):
|
||||
if self.gamma == 'scale':
|
||||
# var = E[X^2] - E[X]^2 if sparse
|
||||
X_var = ((X.multiply(X)).mean() - (X.mean()) ** 2
|
||||
if sparse else X.var())
|
||||
self._gamma = 1.0 / (X.shape[1] * X_var) if X_var != 0 else 1.0
|
||||
elif self.gamma == 'auto':
|
||||
self._gamma = 1.0 / X.shape[1]
|
||||
else:
|
||||
raise ValueError(
|
||||
"When 'gamma' is a string, it should be either 'scale' or "
|
||||
"'auto'. Got '{}' instead.".format(self.gamma)
|
||||
)
|
||||
else:
|
||||
self._gamma = self.gamma
|
||||
|
||||
fit = self._sparse_fit if self._sparse else self._dense_fit
|
||||
if self.verbose:
|
||||
print('[LibSVM]', end='')
|
||||
|
||||
seed = rnd.randint(np.iinfo('i').max)
|
||||
fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
|
||||
# see comment on the other call to np.iinfo in this file
|
||||
|
||||
self.shape_fit_ = X.shape if hasattr(X, "shape") else (n_samples, )
|
||||
|
||||
# In binary case, we need to flip the sign of coef, intercept and
|
||||
# decision function. Use self._intercept_ and self._dual_coef_
|
||||
# internally.
|
||||
self._intercept_ = self.intercept_.copy()
|
||||
self._dual_coef_ = self.dual_coef_
|
||||
if self._impl in ['c_svc', 'nu_svc'] and len(self.classes_) == 2:
|
||||
self.intercept_ *= -1
|
||||
self.dual_coef_ = -self.dual_coef_
|
||||
|
||||
return self
|
||||
|
||||
def _validate_targets(self, y):
|
||||
"""Validation of y and class_weight.
|
||||
|
||||
Default implementation for SVR and one-class; overridden in BaseSVC.
|
||||
"""
|
||||
# XXX this is ugly.
|
||||
# Regression models should not have a class_weight_ attribute.
|
||||
self.class_weight_ = np.empty(0)
|
||||
return column_or_1d(y, warn=True).astype(np.float64, copy=False)
|
||||
|
||||
def _warn_from_fit_status(self):
|
||||
assert self.fit_status_ in (0, 1)
|
||||
if self.fit_status_ == 1:
|
||||
warnings.warn('Solver terminated early (max_iter=%i).'
|
||||
' Consider pre-processing your data with'
|
||||
' StandardScaler or MinMaxScaler.'
|
||||
% self.max_iter, ConvergenceWarning)
|
||||
|
||||
def _dense_fit(self, X, y, sample_weight, solver_type, kernel,
|
||||
random_seed):
|
||||
if callable(self.kernel):
|
||||
# you must store a reference to X to compute the kernel in predict
|
||||
# TODO: add keyword copy to copy on demand
|
||||
self.__Xfit = X
|
||||
X = self._compute_kernel(X)
|
||||
|
||||
if X.shape[0] != X.shape[1]:
|
||||
raise ValueError("X.shape[0] should be equal to X.shape[1]")
|
||||
|
||||
libsvm.set_verbosity_wrap(self.verbose)
|
||||
|
||||
# we don't pass **self.get_params() to allow subclasses to
|
||||
# add other parameters to __init__
|
||||
self.support_, self.support_vectors_, self._n_support, \
|
||||
self.dual_coef_, self.intercept_, self._probA, \
|
||||
self._probB, self.fit_status_ = libsvm.fit(
|
||||
X, y,
|
||||
svm_type=solver_type, sample_weight=sample_weight,
|
||||
class_weight=self.class_weight_, kernel=kernel, C=self.C,
|
||||
nu=self.nu, probability=self.probability, degree=self.degree,
|
||||
shrinking=self.shrinking, tol=self.tol,
|
||||
cache_size=self.cache_size, coef0=self.coef0,
|
||||
gamma=self._gamma, epsilon=self.epsilon,
|
||||
max_iter=self.max_iter, random_seed=random_seed)
|
||||
|
||||
self._warn_from_fit_status()
|
||||
|
||||
def _sparse_fit(self, X, y, sample_weight, solver_type, kernel,
|
||||
random_seed):
|
||||
X.data = np.asarray(X.data, dtype=np.float64, order='C')
|
||||
X.sort_indices()
|
||||
|
||||
kernel_type = self._sparse_kernels.index(kernel)
|
||||
|
||||
libsvm_sparse.set_verbosity_wrap(self.verbose)
|
||||
|
||||
self.support_, self.support_vectors_, dual_coef_data, \
|
||||
self.intercept_, self._n_support, \
|
||||
self._probA, self._probB, self.fit_status_ = \
|
||||
libsvm_sparse.libsvm_sparse_train(
|
||||
X.shape[1], X.data, X.indices, X.indptr, y, solver_type,
|
||||
kernel_type, self.degree, self._gamma, self.coef0, self.tol,
|
||||
self.C, self.class_weight_,
|
||||
sample_weight, self.nu, self.cache_size, self.epsilon,
|
||||
int(self.shrinking), int(self.probability), self.max_iter,
|
||||
random_seed)
|
||||
|
||||
self._warn_from_fit_status()
|
||||
|
||||
if hasattr(self, "classes_"):
|
||||
n_class = len(self.classes_) - 1
|
||||
else: # regression
|
||||
n_class = 1
|
||||
n_SV = self.support_vectors_.shape[0]
|
||||
|
||||
dual_coef_indices = np.tile(np.arange(n_SV), n_class)
|
||||
if not n_SV:
|
||||
self.dual_coef_ = sp.csr_matrix([])
|
||||
else:
|
||||
dual_coef_indptr = np.arange(0, dual_coef_indices.size + 1,
|
||||
dual_coef_indices.size / n_class)
|
||||
self.dual_coef_ = sp.csr_matrix(
|
||||
(dual_coef_data, dual_coef_indices, dual_coef_indptr),
|
||||
(n_class, n_SV))
|
||||
|
||||
def predict(self, X):
|
||||
"""Perform regression on samples in X.
|
||||
|
||||
For an one-class model, +1 (inlier) or -1 (outlier) is returned.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
For kernel="precomputed", the expected shape of X is
|
||||
(n_samples_test, n_samples_train).
|
||||
|
||||
Returns
|
||||
-------
|
||||
y_pred : ndarray of shape (n_samples,)
|
||||
"""
|
||||
X = self._validate_for_predict(X)
|
||||
predict = self._sparse_predict if self._sparse else self._dense_predict
|
||||
return predict(X)
|
||||
|
||||
def _dense_predict(self, X):
|
||||
X = self._compute_kernel(X)
|
||||
if X.ndim == 1:
|
||||
X = check_array(X, order='C', accept_large_sparse=False)
|
||||
|
||||
kernel = self.kernel
|
||||
if callable(self.kernel):
|
||||
kernel = 'precomputed'
|
||||
if X.shape[1] != self.shape_fit_[0]:
|
||||
raise ValueError("X.shape[1] = %d should be equal to %d, "
|
||||
"the number of samples at training time" %
|
||||
(X.shape[1], self.shape_fit_[0]))
|
||||
|
||||
svm_type = LIBSVM_IMPL.index(self._impl)
|
||||
|
||||
return libsvm.predict(
|
||||
X, self.support_, self.support_vectors_, self._n_support,
|
||||
self._dual_coef_, self._intercept_,
|
||||
self._probA, self._probB, svm_type=svm_type, kernel=kernel,
|
||||
degree=self.degree, coef0=self.coef0, gamma=self._gamma,
|
||||
cache_size=self.cache_size)
|
||||
|
||||
def _sparse_predict(self, X):
|
||||
# Precondition: X is a csr_matrix of dtype np.float64.
|
||||
kernel = self.kernel
|
||||
if callable(kernel):
|
||||
kernel = 'precomputed'
|
||||
|
||||
kernel_type = self._sparse_kernels.index(kernel)
|
||||
|
||||
C = 0.0 # C is not useful here
|
||||
|
||||
return libsvm_sparse.libsvm_sparse_predict(
|
||||
X.data, X.indices, X.indptr,
|
||||
self.support_vectors_.data,
|
||||
self.support_vectors_.indices,
|
||||
self.support_vectors_.indptr,
|
||||
self._dual_coef_.data, self._intercept_,
|
||||
LIBSVM_IMPL.index(self._impl), kernel_type,
|
||||
self.degree, self._gamma, self.coef0, self.tol,
|
||||
C, self.class_weight_,
|
||||
self.nu, self.epsilon, self.shrinking,
|
||||
self.probability, self._n_support,
|
||||
self._probA, self._probB)
|
||||
|
||||
def _compute_kernel(self, X):
|
||||
"""Return the data transformed by a callable kernel"""
|
||||
if callable(self.kernel):
|
||||
# in the case of precomputed kernel given as a function, we
|
||||
# have to compute explicitly the kernel matrix
|
||||
kernel = self.kernel(X, self.__Xfit)
|
||||
if sp.issparse(kernel):
|
||||
kernel = kernel.toarray()
|
||||
X = np.asarray(kernel, dtype=np.float64, order='C')
|
||||
return X
|
||||
|
||||
def _decision_function(self, X):
|
||||
"""Evaluates the decision function for the samples in X.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
Returns
|
||||
-------
|
||||
X : array-like of shape (n_samples, n_class * (n_class-1) / 2)
|
||||
Returns the decision function of the sample for each class
|
||||
in the model.
|
||||
"""
|
||||
# NOTE: _validate_for_predict contains check for is_fitted
|
||||
# hence must be placed before any other attributes are used.
|
||||
X = self._validate_for_predict(X)
|
||||
X = self._compute_kernel(X)
|
||||
|
||||
if self._sparse:
|
||||
dec_func = self._sparse_decision_function(X)
|
||||
else:
|
||||
dec_func = self._dense_decision_function(X)
|
||||
|
||||
# In binary case, we need to flip the sign of coef, intercept and
|
||||
# decision function.
|
||||
if self._impl in ['c_svc', 'nu_svc'] and len(self.classes_) == 2:
|
||||
return -dec_func.ravel()
|
||||
|
||||
return dec_func
|
||||
|
||||
def _dense_decision_function(self, X):
|
||||
X = check_array(X, dtype=np.float64, order="C",
|
||||
accept_large_sparse=False)
|
||||
|
||||
kernel = self.kernel
|
||||
if callable(kernel):
|
||||
kernel = 'precomputed'
|
||||
|
||||
return libsvm.decision_function(
|
||||
X, self.support_, self.support_vectors_, self._n_support,
|
||||
self._dual_coef_, self._intercept_,
|
||||
self._probA, self._probB,
|
||||
svm_type=LIBSVM_IMPL.index(self._impl),
|
||||
kernel=kernel, degree=self.degree, cache_size=self.cache_size,
|
||||
coef0=self.coef0, gamma=self._gamma)
|
||||
|
||||
def _sparse_decision_function(self, X):
|
||||
X.data = np.asarray(X.data, dtype=np.float64, order='C')
|
||||
|
||||
kernel = self.kernel
|
||||
if hasattr(kernel, '__call__'):
|
||||
kernel = 'precomputed'
|
||||
|
||||
kernel_type = self._sparse_kernels.index(kernel)
|
||||
|
||||
return libsvm_sparse.libsvm_sparse_decision_function(
|
||||
X.data, X.indices, X.indptr,
|
||||
self.support_vectors_.data,
|
||||
self.support_vectors_.indices,
|
||||
self.support_vectors_.indptr,
|
||||
self._dual_coef_.data, self._intercept_,
|
||||
LIBSVM_IMPL.index(self._impl), kernel_type,
|
||||
self.degree, self._gamma, self.coef0, self.tol,
|
||||
self.C, self.class_weight_,
|
||||
self.nu, self.epsilon, self.shrinking,
|
||||
self.probability, self._n_support,
|
||||
self._probA, self._probB)
|
||||
|
||||
def _validate_for_predict(self, X):
|
||||
check_is_fitted(self)
|
||||
|
||||
if not callable(self.kernel):
|
||||
X = check_array(X, accept_sparse='csr', dtype=np.float64,
|
||||
order="C", accept_large_sparse=False)
|
||||
|
||||
if self._sparse and not sp.isspmatrix(X):
|
||||
X = sp.csr_matrix(X)
|
||||
if self._sparse:
|
||||
X.sort_indices()
|
||||
|
||||
if sp.issparse(X) and not self._sparse and not callable(self.kernel):
|
||||
raise ValueError(
|
||||
"cannot use sparse input in %r trained on dense data"
|
||||
% type(self).__name__)
|
||||
|
||||
if self.kernel == "precomputed":
|
||||
if X.shape[1] != self.shape_fit_[0]:
|
||||
raise ValueError("X.shape[1] = %d should be equal to %d, "
|
||||
"the number of samples at training time" %
|
||||
(X.shape[1], self.shape_fit_[0]))
|
||||
elif not callable(self.kernel) and X.shape[1] != self.shape_fit_[1]:
|
||||
raise ValueError("X.shape[1] = %d should be equal to %d, "
|
||||
"the number of features at training time" %
|
||||
(X.shape[1], self.shape_fit_[1]))
|
||||
return X
|
||||
|
||||
@property
|
||||
def coef_(self):
|
||||
if self.kernel != 'linear':
|
||||
raise AttributeError('coef_ is only available when using a '
|
||||
'linear kernel')
|
||||
|
||||
coef = self._get_coef()
|
||||
|
||||
# coef_ being a read-only property, it's better to mark the value as
|
||||
# immutable to avoid hiding potential bugs for the unsuspecting user.
|
||||
if sp.issparse(coef):
|
||||
# sparse matrix do not have global flags
|
||||
coef.data.flags.writeable = False
|
||||
else:
|
||||
# regular dense array
|
||||
coef.flags.writeable = False
|
||||
return coef
|
||||
|
||||
def _get_coef(self):
|
||||
return safe_sparse_dot(self._dual_coef_, self.support_vectors_)
|
||||
|
||||
@property
|
||||
def n_support_(self):
|
||||
try:
|
||||
check_is_fitted(self)
|
||||
except NotFittedError:
|
||||
raise AttributeError
|
||||
|
||||
svm_type = LIBSVM_IMPL.index(self._impl)
|
||||
if svm_type in (0, 1):
|
||||
return self._n_support
|
||||
else:
|
||||
# SVR and OneClass
|
||||
# _n_support has size 2, we make it size 1
|
||||
return np.array([self._n_support[0]])
|
||||
|
||||
|
||||
class BaseSVC(ClassifierMixin, BaseLibSVM, metaclass=ABCMeta):
|
||||
"""ABC for LibSVM-based classifiers."""
|
||||
@abstractmethod
|
||||
def __init__(self, kernel, degree, gamma, coef0, tol, C, nu,
|
||||
shrinking, probability, cache_size, class_weight, verbose,
|
||||
max_iter, decision_function_shape, random_state,
|
||||
break_ties):
|
||||
self.decision_function_shape = decision_function_shape
|
||||
self.break_ties = break_ties
|
||||
super().__init__(
|
||||
kernel=kernel, degree=degree, gamma=gamma,
|
||||
coef0=coef0, tol=tol, C=C, nu=nu, epsilon=0., shrinking=shrinking,
|
||||
probability=probability, cache_size=cache_size,
|
||||
class_weight=class_weight, verbose=verbose, max_iter=max_iter,
|
||||
random_state=random_state)
|
||||
|
||||
def _validate_targets(self, y):
|
||||
y_ = column_or_1d(y, warn=True)
|
||||
check_classification_targets(y)
|
||||
cls, y = np.unique(y_, return_inverse=True)
|
||||
self.class_weight_ = compute_class_weight(self.class_weight,
|
||||
classes=cls, y=y_)
|
||||
if len(cls) < 2:
|
||||
raise ValueError(
|
||||
"The number of classes has to be greater than one; got %d"
|
||||
" class" % len(cls))
|
||||
|
||||
self.classes_ = cls
|
||||
|
||||
return np.asarray(y, dtype=np.float64, order='C')
|
||||
|
||||
def decision_function(self, X):
|
||||
"""Evaluates the decision function for the samples in X.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
Returns
|
||||
-------
|
||||
X : ndarray of shape (n_samples, n_classes * (n_classes-1) / 2)
|
||||
Returns the decision function of the sample for each class
|
||||
in the model.
|
||||
If decision_function_shape='ovr', the shape is (n_samples,
|
||||
n_classes).
|
||||
|
||||
Notes
|
||||
-----
|
||||
If decision_function_shape='ovo', the function values are proportional
|
||||
to the distance of the samples X to the separating hyperplane. If the
|
||||
exact distances are required, divide the function values by the norm of
|
||||
the weight vector (``coef_``). See also `this question
|
||||
<https://stats.stackexchange.com/questions/14876/
|
||||
interpreting-distance-from-hyperplane-in-svm>`_ for further details.
|
||||
If decision_function_shape='ovr', the decision function is a monotonic
|
||||
transformation of ovo decision function.
|
||||
"""
|
||||
dec = self._decision_function(X)
|
||||
if self.decision_function_shape == 'ovr' and len(self.classes_) > 2:
|
||||
return _ovr_decision_function(dec < 0, -dec, len(self.classes_))
|
||||
return dec
|
||||
|
||||
def predict(self, X):
|
||||
"""Perform classification on samples in X.
|
||||
|
||||
For an one-class model, +1 or -1 is returned.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features) or \
|
||||
(n_samples_test, n_samples_train)
|
||||
For kernel="precomputed", the expected shape of X is
|
||||
(n_samples_test, n_samples_train).
|
||||
|
||||
Returns
|
||||
-------
|
||||
y_pred : ndarray of shape (n_samples,)
|
||||
Class labels for samples in X.
|
||||
"""
|
||||
check_is_fitted(self)
|
||||
if self.break_ties and self.decision_function_shape == 'ovo':
|
||||
raise ValueError("break_ties must be False when "
|
||||
"decision_function_shape is 'ovo'")
|
||||
|
||||
if (self.break_ties
|
||||
and self.decision_function_shape == 'ovr'
|
||||
and len(self.classes_) > 2):
|
||||
y = np.argmax(self.decision_function(X), axis=1)
|
||||
else:
|
||||
y = super().predict(X)
|
||||
return self.classes_.take(np.asarray(y, dtype=np.intp))
|
||||
|
||||
# Hacky way of getting predict_proba to raise an AttributeError when
|
||||
# probability=False using properties. Do not use this in new code; when
|
||||
# probabilities are not available depending on a setting, introduce two
|
||||
# estimators.
|
||||
def _check_proba(self):
|
||||
if not self.probability:
|
||||
raise AttributeError("predict_proba is not available when "
|
||||
" probability=False")
|
||||
if self._impl not in ('c_svc', 'nu_svc'):
|
||||
raise AttributeError("predict_proba only implemented for SVC"
|
||||
" and NuSVC")
|
||||
|
||||
@property
|
||||
def predict_proba(self):
|
||||
"""Compute probabilities of possible outcomes for samples in X.
|
||||
|
||||
The model need to have probability information computed at training
|
||||
time: fit with attribute `probability` set to True.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
For kernel="precomputed", the expected shape of X is
|
||||
[n_samples_test, n_samples_train]
|
||||
|
||||
Returns
|
||||
-------
|
||||
T : ndarray of shape (n_samples, n_classes)
|
||||
Returns the probability of the sample for each class in
|
||||
the model. The columns correspond to the classes in sorted
|
||||
order, as they appear in the attribute :term:`classes_`.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The probability model is created using cross validation, so
|
||||
the results can be slightly different than those obtained by
|
||||
predict. Also, it will produce meaningless results on very small
|
||||
datasets.
|
||||
"""
|
||||
self._check_proba()
|
||||
return self._predict_proba
|
||||
|
||||
def _predict_proba(self, X):
|
||||
X = self._validate_for_predict(X)
|
||||
if self.probA_.size == 0 or self.probB_.size == 0:
|
||||
raise NotFittedError("predict_proba is not available when fitted "
|
||||
"with probability=False")
|
||||
pred_proba = (self._sparse_predict_proba
|
||||
if self._sparse else self._dense_predict_proba)
|
||||
return pred_proba(X)
|
||||
|
||||
@property
|
||||
def predict_log_proba(self):
|
||||
"""Compute log probabilities of possible outcomes for samples in X.
|
||||
|
||||
The model need to have probability information computed at training
|
||||
time: fit with attribute `probability` set to True.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features) or \
|
||||
(n_samples_test, n_samples_train)
|
||||
For kernel="precomputed", the expected shape of X is
|
||||
(n_samples_test, n_samples_train).
|
||||
|
||||
Returns
|
||||
-------
|
||||
T : ndarray of shape (n_samples, n_classes)
|
||||
Returns the log-probabilities of the sample for each class in
|
||||
the model. The columns correspond to the classes in sorted
|
||||
order, as they appear in the attribute :term:`classes_`.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The probability model is created using cross validation, so
|
||||
the results can be slightly different than those obtained by
|
||||
predict. Also, it will produce meaningless results on very small
|
||||
datasets.
|
||||
"""
|
||||
self._check_proba()
|
||||
return self._predict_log_proba
|
||||
|
||||
def _predict_log_proba(self, X):
|
||||
return np.log(self.predict_proba(X))
|
||||
|
||||
def _dense_predict_proba(self, X):
|
||||
X = self._compute_kernel(X)
|
||||
|
||||
kernel = self.kernel
|
||||
if callable(kernel):
|
||||
kernel = 'precomputed'
|
||||
|
||||
svm_type = LIBSVM_IMPL.index(self._impl)
|
||||
pprob = libsvm.predict_proba(
|
||||
X, self.support_, self.support_vectors_, self._n_support,
|
||||
self._dual_coef_, self._intercept_,
|
||||
self._probA, self._probB,
|
||||
svm_type=svm_type, kernel=kernel, degree=self.degree,
|
||||
cache_size=self.cache_size, coef0=self.coef0, gamma=self._gamma)
|
||||
|
||||
return pprob
|
||||
|
||||
def _sparse_predict_proba(self, X):
|
||||
X.data = np.asarray(X.data, dtype=np.float64, order='C')
|
||||
|
||||
kernel = self.kernel
|
||||
if callable(kernel):
|
||||
kernel = 'precomputed'
|
||||
|
||||
kernel_type = self._sparse_kernels.index(kernel)
|
||||
|
||||
return libsvm_sparse.libsvm_sparse_predict_proba(
|
||||
X.data, X.indices, X.indptr,
|
||||
self.support_vectors_.data,
|
||||
self.support_vectors_.indices,
|
||||
self.support_vectors_.indptr,
|
||||
self._dual_coef_.data, self._intercept_,
|
||||
LIBSVM_IMPL.index(self._impl), kernel_type,
|
||||
self.degree, self._gamma, self.coef0, self.tol,
|
||||
self.C, self.class_weight_,
|
||||
self.nu, self.epsilon, self.shrinking,
|
||||
self.probability, self._n_support,
|
||||
self._probA, self._probB)
|
||||
|
||||
def _get_coef(self):
|
||||
if self.dual_coef_.shape[0] == 1:
|
||||
# binary classifier
|
||||
coef = safe_sparse_dot(self.dual_coef_, self.support_vectors_)
|
||||
else:
|
||||
# 1vs1 classifier
|
||||
coef = _one_vs_one_coef(self.dual_coef_, self._n_support,
|
||||
self.support_vectors_)
|
||||
if sp.issparse(coef[0]):
|
||||
coef = sp.vstack(coef).tocsr()
|
||||
else:
|
||||
coef = np.vstack(coef)
|
||||
|
||||
return coef
|
||||
|
||||
@property
|
||||
def probA_(self):
|
||||
return self._probA
|
||||
|
||||
@property
|
||||
def probB_(self):
|
||||
return self._probB
|
||||
|
||||
|
||||
def _get_liblinear_solver_type(multi_class, penalty, loss, dual):
|
||||
"""Find the liblinear magic number for the solver.
|
||||
|
||||
This number depends on the values of the following attributes:
|
||||
- multi_class
|
||||
- penalty
|
||||
- loss
|
||||
- dual
|
||||
|
||||
The same number is also internally used by LibLinear to determine
|
||||
which solver to use.
|
||||
"""
|
||||
# nested dicts containing level 1: available loss functions,
|
||||
# level2: available penalties for the given loss function,
|
||||
# level3: whether the dual solver is available for the specified
|
||||
# combination of loss function and penalty
|
||||
_solver_type_dict = {
|
||||
'logistic_regression': {
|
||||
'l1': {False: 6},
|
||||
'l2': {False: 0, True: 7}},
|
||||
'hinge': {
|
||||
'l2': {True: 3}},
|
||||
'squared_hinge': {
|
||||
'l1': {False: 5},
|
||||
'l2': {False: 2, True: 1}},
|
||||
'epsilon_insensitive': {
|
||||
'l2': {True: 13}},
|
||||
'squared_epsilon_insensitive': {
|
||||
'l2': {False: 11, True: 12}},
|
||||
'crammer_singer': 4
|
||||
}
|
||||
|
||||
if multi_class == 'crammer_singer':
|
||||
return _solver_type_dict[multi_class]
|
||||
elif multi_class != 'ovr':
|
||||
raise ValueError("`multi_class` must be one of `ovr`, "
|
||||
"`crammer_singer`, got %r" % multi_class)
|
||||
|
||||
_solver_pen = _solver_type_dict.get(loss, None)
|
||||
if _solver_pen is None:
|
||||
error_string = ("loss='%s' is not supported" % loss)
|
||||
else:
|
||||
_solver_dual = _solver_pen.get(penalty, None)
|
||||
if _solver_dual is None:
|
||||
error_string = ("The combination of penalty='%s' "
|
||||
"and loss='%s' is not supported"
|
||||
% (penalty, loss))
|
||||
else:
|
||||
solver_num = _solver_dual.get(dual, None)
|
||||
if solver_num is None:
|
||||
error_string = ("The combination of penalty='%s' and "
|
||||
"loss='%s' are not supported when dual=%s"
|
||||
% (penalty, loss, dual))
|
||||
else:
|
||||
return solver_num
|
||||
raise ValueError('Unsupported set of arguments: %s, '
|
||||
'Parameters: penalty=%r, loss=%r, dual=%r'
|
||||
% (error_string, penalty, loss, dual))
|
||||
|
||||
|
||||
def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
|
||||
penalty, dual, verbose, max_iter, tol,
|
||||
random_state=None, multi_class='ovr',
|
||||
loss='logistic_regression', epsilon=0.1,
|
||||
sample_weight=None):
|
||||
"""Used by Logistic Regression (and CV) and LinearSVC/LinearSVR.
|
||||
|
||||
Preprocessing is done in this function before supplying it to liblinear.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Training vector, where n_samples in the number of samples and
|
||||
n_features is the number of features.
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
Target vector relative to X
|
||||
|
||||
C : float
|
||||
Inverse of cross-validation parameter. Lower the C, the more
|
||||
the penalization.
|
||||
|
||||
fit_intercept : bool
|
||||
Whether or not to fit the intercept, that is to add a intercept
|
||||
term to the decision function.
|
||||
|
||||
intercept_scaling : float
|
||||
LibLinear internally penalizes the intercept and this term is subject
|
||||
to regularization just like the other terms of the feature vector.
|
||||
In order to avoid this, one should increase the intercept_scaling.
|
||||
such that the feature vector becomes [x, intercept_scaling].
|
||||
|
||||
class_weight : dict or 'balanced', default=None
|
||||
Weights associated with classes in the form ``{class_label: weight}``.
|
||||
If not given, all classes are supposed to have weight one. For
|
||||
multi-output problems, a list of dicts can be provided in the same
|
||||
order as the columns of y.
|
||||
|
||||
The "balanced" mode uses the values of y to automatically adjust
|
||||
weights inversely proportional to class frequencies in the input data
|
||||
as ``n_samples / (n_classes * np.bincount(y))``
|
||||
|
||||
penalty : {'l1', 'l2'}
|
||||
The norm of the penalty used in regularization.
|
||||
|
||||
dual : bool
|
||||
Dual or primal formulation,
|
||||
|
||||
verbose : int
|
||||
Set verbose to any positive number for verbosity.
|
||||
|
||||
max_iter : int
|
||||
Number of iterations.
|
||||
|
||||
tol : float
|
||||
Stopping condition.
|
||||
|
||||
random_state : int or RandomState instance, default=None
|
||||
Controls the pseudo random number generation for shuffling the data.
|
||||
Pass an int for reproducible output across multiple function calls.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
multi_class : {'ovr', 'crammer_singer'}, default='ovr'
|
||||
`ovr` trains n_classes one-vs-rest classifiers, while `crammer_singer`
|
||||
optimizes a joint objective over all classes.
|
||||
While `crammer_singer` is interesting from an theoretical perspective
|
||||
as it is consistent it is seldom used in practice and rarely leads to
|
||||
better accuracy and is more expensive to compute.
|
||||
If `crammer_singer` is chosen, the options loss, penalty and dual will
|
||||
be ignored.
|
||||
|
||||
loss : {'logistic_regression', 'hinge', 'squared_hinge', \
|
||||
'epsilon_insensitive', 'squared_epsilon_insensitive}, \
|
||||
default='logistic_regression'
|
||||
The loss function used to fit the model.
|
||||
|
||||
epsilon : float, default=0.1
|
||||
Epsilon parameter in the epsilon-insensitive loss function. Note
|
||||
that the value of this parameter depends on the scale of the target
|
||||
variable y. If unsure, set epsilon=0.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Weights assigned to each sample.
|
||||
|
||||
Returns
|
||||
-------
|
||||
coef_ : ndarray of shape (n_features, n_features + 1)
|
||||
The coefficient vector got by minimizing the objective function.
|
||||
|
||||
intercept_ : float
|
||||
The intercept term added to the vector.
|
||||
|
||||
n_iter_ : int
|
||||
Maximum number of iterations run across all classes.
|
||||
"""
|
||||
if loss not in ['epsilon_insensitive', 'squared_epsilon_insensitive']:
|
||||
enc = LabelEncoder()
|
||||
y_ind = enc.fit_transform(y)
|
||||
classes_ = enc.classes_
|
||||
if len(classes_) < 2:
|
||||
raise ValueError("This solver needs samples of at least 2 classes"
|
||||
" in the data, but the data contains only one"
|
||||
" class: %r" % classes_[0])
|
||||
|
||||
class_weight_ = compute_class_weight(class_weight, classes=classes_,
|
||||
y=y)
|
||||
else:
|
||||
class_weight_ = np.empty(0, dtype=np.float64)
|
||||
y_ind = y
|
||||
liblinear.set_verbosity_wrap(verbose)
|
||||
rnd = check_random_state(random_state)
|
||||
if verbose:
|
||||
print('[LibLinear]', end='')
|
||||
|
||||
# LinearSVC breaks when intercept_scaling is <= 0
|
||||
bias = -1.0
|
||||
if fit_intercept:
|
||||
if intercept_scaling <= 0:
|
||||
raise ValueError("Intercept scaling is %r but needs to be greater "
|
||||
"than 0. To disable fitting an intercept,"
|
||||
" set fit_intercept=False." % intercept_scaling)
|
||||
else:
|
||||
bias = intercept_scaling
|
||||
|
||||
libsvm.set_verbosity_wrap(verbose)
|
||||
libsvm_sparse.set_verbosity_wrap(verbose)
|
||||
liblinear.set_verbosity_wrap(verbose)
|
||||
|
||||
# Liblinear doesn't support 64bit sparse matrix indices yet
|
||||
if sp.issparse(X):
|
||||
_check_large_sparse(X)
|
||||
|
||||
# LibLinear wants targets as doubles, even for classification
|
||||
y_ind = np.asarray(y_ind, dtype=np.float64).ravel()
|
||||
y_ind = np.require(y_ind, requirements="W")
|
||||
|
||||
sample_weight = _check_sample_weight(sample_weight, X,
|
||||
dtype=np.float64)
|
||||
|
||||
solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual)
|
||||
raw_coef_, n_iter_ = liblinear.train_wrap(
|
||||
X, y_ind, sp.isspmatrix(X), solver_type, tol, bias, C,
|
||||
class_weight_, max_iter, rnd.randint(np.iinfo('i').max),
|
||||
epsilon, sample_weight)
|
||||
# Regarding rnd.randint(..) in the above signature:
|
||||
# seed for srand in range [0..INT_MAX); due to limitations in Numpy
|
||||
# on 32-bit platforms, we can't get to the UINT_MAX limit that
|
||||
# srand supports
|
||||
n_iter_ = max(n_iter_)
|
||||
if n_iter_ >= max_iter:
|
||||
warnings.warn("Liblinear failed to converge, increase "
|
||||
"the number of iterations.", ConvergenceWarning)
|
||||
|
||||
if fit_intercept:
|
||||
coef_ = raw_coef_[:, :-1]
|
||||
intercept_ = intercept_scaling * raw_coef_[:, -1]
|
||||
else:
|
||||
coef_ = raw_coef_
|
||||
intercept_ = 0.
|
||||
|
||||
return coef_, intercept_, n_iter_
|
74
venv/Lib/site-packages/sklearn/svm/_bounds.py
Normal file
74
venv/Lib/site-packages/sklearn/svm/_bounds.py
Normal file
|
@ -0,0 +1,74 @@
|
|||
"""Determination of parameter bounds"""
|
||||
# Author: Paolo Losi
|
||||
# License: BSD 3 clause
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ..preprocessing import LabelBinarizer
|
||||
from ..utils.validation import check_consistent_length, check_array
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ..utils.extmath import safe_sparse_dot
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def l1_min_c(X, y, *, loss='squared_hinge', fit_intercept=True,
|
||||
intercept_scaling=1.0):
|
||||
"""
|
||||
Return the lowest bound for C such that for C in (l1_min_C, infinity)
|
||||
the model is guaranteed not to be empty. This applies to l1 penalized
|
||||
classifiers, such as LinearSVC with penalty='l1' and
|
||||
linear_model.LogisticRegression with penalty='l1'.
|
||||
|
||||
This value is valid if class_weight parameter in fit() is not set.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Training vector, where n_samples in the number of samples and
|
||||
n_features is the number of features.
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
Target vector relative to X.
|
||||
|
||||
loss : {'squared_hinge', 'log'}, default='squared_hinge'
|
||||
Specifies the loss function.
|
||||
With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).
|
||||
With 'log' it is the loss of logistic regression models.
|
||||
|
||||
fit_intercept : bool, default=True
|
||||
Specifies if the intercept should be fitted by the model.
|
||||
It must match the fit() method parameter.
|
||||
|
||||
intercept_scaling : float, default=1.0
|
||||
when fit_intercept is True, instance vector x becomes
|
||||
[x, intercept_scaling],
|
||||
i.e. a "synthetic" feature with constant value equals to
|
||||
intercept_scaling is appended to the instance vector.
|
||||
It must match the fit() method parameter.
|
||||
|
||||
Returns
|
||||
-------
|
||||
l1_min_c : float
|
||||
minimum value for C
|
||||
"""
|
||||
if loss not in ('squared_hinge', 'log'):
|
||||
raise ValueError('loss type not in ("squared_hinge", "log")')
|
||||
|
||||
X = check_array(X, accept_sparse='csc')
|
||||
check_consistent_length(X, y)
|
||||
|
||||
Y = LabelBinarizer(neg_label=-1).fit_transform(y).T
|
||||
# maximum absolute value over classes and features
|
||||
den = np.max(np.abs(safe_sparse_dot(Y, X)))
|
||||
if fit_intercept:
|
||||
bias = np.full((np.size(y), 1), intercept_scaling,
|
||||
dtype=np.array(intercept_scaling).dtype)
|
||||
den = max(den, abs(np.dot(Y, bias)).max())
|
||||
|
||||
if den == 0.0:
|
||||
raise ValueError('Ill-posed l1_min_c calculation: l1 will always '
|
||||
'select zero coefficients for this data')
|
||||
if loss == 'squared_hinge':
|
||||
return 0.5 / den
|
||||
else: # loss == 'log':
|
||||
return 2.0 / den
|
1373
venv/Lib/site-packages/sklearn/svm/_classes.py
Normal file
1373
venv/Lib/site-packages/sklearn/svm/_classes.py
Normal file
File diff suppressed because it is too large
Load diff
BIN
venv/Lib/site-packages/sklearn/svm/_liblinear.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/sklearn/svm/_liblinear.cp36-win32.pyd
Normal file
Binary file not shown.
BIN
venv/Lib/site-packages/sklearn/svm/_libsvm.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/sklearn/svm/_libsvm.cp36-win32.pyd
Normal file
Binary file not shown.
BIN
venv/Lib/site-packages/sklearn/svm/_libsvm_sparse.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/sklearn/svm/_libsvm_sparse.cp36-win32.pyd
Normal file
Binary file not shown.
18
venv/Lib/site-packages/sklearn/svm/base.py
Normal file
18
venv/Lib/site-packages/sklearn/svm/base.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _base # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.svm.base'
|
||||
correct_import_path = 'sklearn.svm'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_base, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/svm/bounds.py
Normal file
18
venv/Lib/site-packages/sklearn/svm/bounds.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _bounds # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.svm.bounds'
|
||||
correct_import_path = 'sklearn.svm'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_bounds, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/svm/classes.py
Normal file
18
venv/Lib/site-packages/sklearn/svm/classes.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _classes # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.svm.classes'
|
||||
correct_import_path = 'sklearn.svm'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_classes, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/svm/liblinear.py
Normal file
18
venv/Lib/site-packages/sklearn/svm/liblinear.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _liblinear # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.svm.liblinear'
|
||||
correct_import_path = 'sklearn.svm'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_liblinear, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/svm/libsvm.py
Normal file
18
venv/Lib/site-packages/sklearn/svm/libsvm.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _libsvm # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.svm.libsvm'
|
||||
correct_import_path = 'sklearn.svm'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_libsvm, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/svm/libsvm_sparse.py
Normal file
18
venv/Lib/site-packages/sklearn/svm/libsvm_sparse.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _libsvm_sparse # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.svm.libsvm_sparse'
|
||||
correct_import_path = 'sklearn.svm'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_libsvm_sparse, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
98
venv/Lib/site-packages/sklearn/svm/setup.py
Normal file
98
venv/Lib/site-packages/sklearn/svm/setup.py
Normal file
|
@ -0,0 +1,98 @@
|
|||
import os
|
||||
from os.path import join
|
||||
import numpy
|
||||
|
||||
|
||||
def configuration(parent_package='', top_path=None):
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
|
||||
config = Configuration('svm', parent_package, top_path)
|
||||
|
||||
config.add_subpackage('tests')
|
||||
|
||||
# Section LibSVM
|
||||
|
||||
# we compile both libsvm and libsvm_sparse
|
||||
config.add_library('libsvm-skl',
|
||||
sources=[join('src', 'libsvm', 'libsvm_template.cpp')],
|
||||
depends=[join('src', 'libsvm', 'svm.cpp'),
|
||||
join('src', 'libsvm', 'svm.h'),
|
||||
join('src', 'newrand', 'newrand.h')],
|
||||
# Force C++ linking in case gcc is picked up instead
|
||||
# of g++ under windows with some versions of MinGW
|
||||
extra_link_args=['-lstdc++'],
|
||||
# Use C++11 to use the random number generator fix
|
||||
extra_compiler_args=['-std=c++11'],
|
||||
)
|
||||
|
||||
libsvm_sources = ['_libsvm.pyx']
|
||||
libsvm_depends = [join('src', 'libsvm', 'libsvm_helper.c'),
|
||||
join('src', 'libsvm', 'libsvm_template.cpp'),
|
||||
join('src', 'libsvm', 'svm.cpp'),
|
||||
join('src', 'libsvm', 'svm.h'),
|
||||
join('src', 'newrand', 'newrand.h')]
|
||||
|
||||
config.add_extension('_libsvm',
|
||||
sources=libsvm_sources,
|
||||
include_dirs=[numpy.get_include(),
|
||||
join('src', 'libsvm'),
|
||||
join('src', 'newrand')],
|
||||
libraries=['libsvm-skl'],
|
||||
depends=libsvm_depends,
|
||||
)
|
||||
|
||||
# liblinear module
|
||||
libraries = []
|
||||
if os.name == 'posix':
|
||||
libraries.append('m')
|
||||
|
||||
# precompile liblinear to use C++11 flag
|
||||
config.add_library('liblinear-skl',
|
||||
sources=[join('src', 'liblinear', 'linear.cpp'),
|
||||
join('src', 'liblinear', 'tron.cpp')],
|
||||
depends=[join('src', 'liblinear', 'linear.h'),
|
||||
join('src', 'liblinear', 'tron.h'),
|
||||
join('src', 'newrand', 'newrand.h')],
|
||||
# Force C++ linking in case gcc is picked up instead
|
||||
# of g++ under windows with some versions of MinGW
|
||||
extra_link_args=['-lstdc++'],
|
||||
# Use C++11 to use the random number generator fix
|
||||
extra_compiler_args=['-std=c++11'],
|
||||
)
|
||||
|
||||
liblinear_sources = ['_liblinear.pyx']
|
||||
liblinear_depends = [join('src', 'liblinear', '*.h'),
|
||||
join('src', 'newrand', 'newrand.h'),
|
||||
join('src', 'liblinear', 'liblinear_helper.c')]
|
||||
|
||||
config.add_extension('_liblinear',
|
||||
sources=liblinear_sources,
|
||||
libraries=['liblinear-skl'] + libraries,
|
||||
include_dirs=[join('.', 'src', 'liblinear'),
|
||||
join('.', 'src', 'newrand'),
|
||||
join('..', 'utils'),
|
||||
numpy.get_include()],
|
||||
depends=liblinear_depends,
|
||||
# extra_compile_args=['-O0 -fno-inline'],
|
||||
)
|
||||
|
||||
# end liblinear module
|
||||
|
||||
# this should go *after* libsvm-skl
|
||||
libsvm_sparse_sources = ['_libsvm_sparse.pyx']
|
||||
config.add_extension('_libsvm_sparse', libraries=['libsvm-skl'],
|
||||
sources=libsvm_sparse_sources,
|
||||
include_dirs=[numpy.get_include(),
|
||||
join("src", "libsvm"),
|
||||
join("src", "newrand")],
|
||||
depends=[join("src", "libsvm", "svm.h"),
|
||||
join('src', 'newrand', 'newrand.h'),
|
||||
join("src", "libsvm",
|
||||
"libsvm_sparse_helper.c")])
|
||||
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from numpy.distutils.core import setup
|
||||
setup(**configuration(top_path='').todict())
|
0
venv/Lib/site-packages/sklearn/svm/tests/__init__.py
Normal file
0
venv/Lib/site-packages/sklearn/svm/tests/__init__.py
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
76
venv/Lib/site-packages/sklearn/svm/tests/test_bounds.py
Normal file
76
venv/Lib/site-packages/sklearn/svm/tests/test_bounds.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
import numpy as np
|
||||
from scipy import sparse as sp
|
||||
|
||||
import pytest
|
||||
|
||||
from sklearn.svm._bounds import l1_min_c
|
||||
from sklearn.svm import LinearSVC
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
from sklearn.utils._testing import assert_raise_message
|
||||
|
||||
|
||||
dense_X = [[-1, 0], [0, 1], [1, 1], [1, 1]]
|
||||
sparse_X = sp.csr_matrix(dense_X)
|
||||
|
||||
Y1 = [0, 1, 1, 1]
|
||||
Y2 = [2, 1, 0, 0]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('loss', ['squared_hinge', 'log'])
|
||||
@pytest.mark.parametrize('X_label', ['sparse', 'dense'])
|
||||
@pytest.mark.parametrize('Y_label', ['two-classes', 'multi-class'])
|
||||
@pytest.mark.parametrize('intercept_label', ['no-intercept', 'fit-intercept'])
|
||||
def test_l1_min_c(loss, X_label, Y_label, intercept_label):
|
||||
Xs = {'sparse': sparse_X, 'dense': dense_X}
|
||||
Ys = {'two-classes': Y1, 'multi-class': Y2}
|
||||
intercepts = {'no-intercept': {'fit_intercept': False},
|
||||
'fit-intercept': {'fit_intercept': True,
|
||||
'intercept_scaling': 10}}
|
||||
|
||||
X = Xs[X_label]
|
||||
Y = Ys[Y_label]
|
||||
intercept_params = intercepts[intercept_label]
|
||||
check_l1_min_c(X, Y, loss, **intercept_params)
|
||||
|
||||
|
||||
def test_l1_min_c_l2_loss():
|
||||
# loss='l2' should raise ValueError
|
||||
assert_raise_message(ValueError, "loss type not in",
|
||||
l1_min_c, dense_X, Y1, loss="l2")
|
||||
|
||||
|
||||
def check_l1_min_c(X, y, loss, fit_intercept=True, intercept_scaling=None):
|
||||
min_c = l1_min_c(X, y, loss=loss, fit_intercept=fit_intercept,
|
||||
intercept_scaling=intercept_scaling)
|
||||
|
||||
clf = {
|
||||
'log': LogisticRegression(penalty='l1', solver='liblinear'),
|
||||
'squared_hinge': LinearSVC(loss='squared_hinge',
|
||||
penalty='l1', dual=False),
|
||||
}[loss]
|
||||
|
||||
clf.fit_intercept = fit_intercept
|
||||
clf.intercept_scaling = intercept_scaling
|
||||
|
||||
clf.C = min_c
|
||||
clf.fit(X, y)
|
||||
assert (np.asarray(clf.coef_) == 0).all()
|
||||
assert (np.asarray(clf.intercept_) == 0).all()
|
||||
|
||||
clf.C = min_c * 1.01
|
||||
clf.fit(X, y)
|
||||
assert ((np.asarray(clf.coef_) != 0).any() or
|
||||
(np.asarray(clf.intercept_) != 0).any())
|
||||
|
||||
|
||||
def test_ill_posed_min_c():
|
||||
X = [[0, 0], [0, 0]]
|
||||
y = [0, 1]
|
||||
with pytest.raises(ValueError):
|
||||
l1_min_c(X, y)
|
||||
|
||||
|
||||
def test_unsupported_loss():
|
||||
with pytest.raises(ValueError):
|
||||
l1_min_c(dense_X, Y1, loss='l1')
|
362
venv/Lib/site-packages/sklearn/svm/tests/test_sparse.py
Normal file
362
venv/Lib/site-packages/sklearn/svm/tests/test_sparse.py
Normal file
|
@ -0,0 +1,362 @@
|
|||
import pytest
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_array_almost_equal, assert_array_equal
|
||||
from scipy import sparse
|
||||
|
||||
from sklearn import datasets, svm, linear_model, base
|
||||
from sklearn.datasets import make_classification, load_digits, make_blobs
|
||||
from sklearn.svm.tests import test_svm
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
from sklearn.utils.extmath import safe_sparse_dot
|
||||
from sklearn.utils._testing import (assert_warns,
|
||||
assert_raise_message, ignore_warnings,
|
||||
skip_if_32bit)
|
||||
|
||||
|
||||
# test sample 1
|
||||
X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]])
|
||||
X_sp = sparse.lil_matrix(X)
|
||||
Y = [1, 1, 1, 2, 2, 2]
|
||||
T = np.array([[-1, -1], [2, 2], [3, 2]])
|
||||
true_result = [1, 2, 2]
|
||||
|
||||
# test sample 2
|
||||
X2 = np.array([[0, 0, 0], [1, 1, 1], [2, 0, 0, ],
|
||||
[0, 0, 2], [3, 3, 3]])
|
||||
X2_sp = sparse.dok_matrix(X2)
|
||||
Y2 = [1, 2, 2, 2, 3]
|
||||
T2 = np.array([[-1, -1, -1], [1, 1, 1], [2, 2, 2]])
|
||||
true_result2 = [1, 2, 3]
|
||||
|
||||
|
||||
iris = datasets.load_iris()
|
||||
# permute
|
||||
rng = np.random.RandomState(0)
|
||||
perm = rng.permutation(iris.target.size)
|
||||
iris.data = iris.data[perm]
|
||||
iris.target = iris.target[perm]
|
||||
# sparsify
|
||||
iris.data = sparse.csr_matrix(iris.data)
|
||||
|
||||
|
||||
def check_svm_model_equal(dense_svm, sparse_svm, X_train, y_train, X_test):
|
||||
dense_svm.fit(X_train.toarray(), y_train)
|
||||
if sparse.isspmatrix(X_test):
|
||||
X_test_dense = X_test.toarray()
|
||||
else:
|
||||
X_test_dense = X_test
|
||||
sparse_svm.fit(X_train, y_train)
|
||||
assert sparse.issparse(sparse_svm.support_vectors_)
|
||||
assert sparse.issparse(sparse_svm.dual_coef_)
|
||||
assert_array_almost_equal(dense_svm.support_vectors_,
|
||||
sparse_svm.support_vectors_.toarray())
|
||||
assert_array_almost_equal(dense_svm.dual_coef_,
|
||||
sparse_svm.dual_coef_.toarray())
|
||||
if dense_svm.kernel == "linear":
|
||||
assert sparse.issparse(sparse_svm.coef_)
|
||||
assert_array_almost_equal(dense_svm.coef_, sparse_svm.coef_.toarray())
|
||||
assert_array_almost_equal(dense_svm.support_, sparse_svm.support_)
|
||||
assert_array_almost_equal(dense_svm.predict(X_test_dense),
|
||||
sparse_svm.predict(X_test))
|
||||
assert_array_almost_equal(dense_svm.decision_function(X_test_dense),
|
||||
sparse_svm.decision_function(X_test))
|
||||
assert_array_almost_equal(dense_svm.decision_function(X_test_dense),
|
||||
sparse_svm.decision_function(X_test_dense))
|
||||
if isinstance(dense_svm, svm.OneClassSVM):
|
||||
msg = "cannot use sparse input in 'OneClassSVM' trained on dense data"
|
||||
else:
|
||||
assert_array_almost_equal(dense_svm.predict_proba(X_test_dense),
|
||||
sparse_svm.predict_proba(X_test), 4)
|
||||
msg = "cannot use sparse input in 'SVC' trained on dense data"
|
||||
if sparse.isspmatrix(X_test):
|
||||
assert_raise_message(ValueError, msg, dense_svm.predict, X_test)
|
||||
|
||||
|
||||
@skip_if_32bit
|
||||
def test_svc():
|
||||
"""Check that sparse SVC gives the same result as SVC"""
|
||||
# many class dataset:
|
||||
X_blobs, y_blobs = make_blobs(n_samples=100, centers=10, random_state=0)
|
||||
X_blobs = sparse.csr_matrix(X_blobs)
|
||||
|
||||
datasets = [[X_sp, Y, T], [X2_sp, Y2, T2],
|
||||
[X_blobs[:80], y_blobs[:80], X_blobs[80:]],
|
||||
[iris.data, iris.target, iris.data]]
|
||||
kernels = ["linear", "poly", "rbf", "sigmoid"]
|
||||
for dataset in datasets:
|
||||
for kernel in kernels:
|
||||
clf = svm.SVC(gamma=1, kernel=kernel, probability=True,
|
||||
random_state=0, decision_function_shape='ovo')
|
||||
sp_clf = svm.SVC(gamma=1, kernel=kernel, probability=True,
|
||||
random_state=0, decision_function_shape='ovo')
|
||||
check_svm_model_equal(clf, sp_clf, *dataset)
|
||||
|
||||
|
||||
def test_unsorted_indices():
|
||||
# test that the result with sorted and unsorted indices in csr is the same
|
||||
# we use a subset of digits as iris, blobs or make_classification didn't
|
||||
# show the problem
|
||||
X, y = load_digits(return_X_y=True)
|
||||
X_test = sparse.csr_matrix(X[50:100])
|
||||
X, y = X[:50], y[:50]
|
||||
|
||||
X_sparse = sparse.csr_matrix(X)
|
||||
coef_dense = svm.SVC(kernel='linear', probability=True,
|
||||
random_state=0).fit(X, y).coef_
|
||||
sparse_svc = svm.SVC(kernel='linear', probability=True,
|
||||
random_state=0).fit(X_sparse, y)
|
||||
coef_sorted = sparse_svc.coef_
|
||||
# make sure dense and sparse SVM give the same result
|
||||
assert_array_almost_equal(coef_dense, coef_sorted.toarray())
|
||||
|
||||
# reverse each row's indices
|
||||
def scramble_indices(X):
|
||||
new_data = []
|
||||
new_indices = []
|
||||
for i in range(1, len(X.indptr)):
|
||||
row_slice = slice(*X.indptr[i - 1: i + 1])
|
||||
new_data.extend(X.data[row_slice][::-1])
|
||||
new_indices.extend(X.indices[row_slice][::-1])
|
||||
return sparse.csr_matrix((new_data, new_indices, X.indptr),
|
||||
shape=X.shape)
|
||||
|
||||
X_sparse_unsorted = scramble_indices(X_sparse)
|
||||
X_test_unsorted = scramble_indices(X_test)
|
||||
|
||||
assert not X_sparse_unsorted.has_sorted_indices
|
||||
assert not X_test_unsorted.has_sorted_indices
|
||||
|
||||
unsorted_svc = svm.SVC(kernel='linear', probability=True,
|
||||
random_state=0).fit(X_sparse_unsorted, y)
|
||||
coef_unsorted = unsorted_svc.coef_
|
||||
# make sure unsorted indices give same result
|
||||
assert_array_almost_equal(coef_unsorted.toarray(), coef_sorted.toarray())
|
||||
assert_array_almost_equal(sparse_svc.predict_proba(X_test_unsorted),
|
||||
sparse_svc.predict_proba(X_test))
|
||||
|
||||
|
||||
def test_svc_with_custom_kernel():
|
||||
def kfunc(x, y):
|
||||
return safe_sparse_dot(x, y.T)
|
||||
clf_lin = svm.SVC(kernel='linear').fit(X_sp, Y)
|
||||
clf_mylin = svm.SVC(kernel=kfunc).fit(X_sp, Y)
|
||||
assert_array_equal(clf_lin.predict(X_sp), clf_mylin.predict(X_sp))
|
||||
|
||||
|
||||
def test_svc_iris():
|
||||
# Test the sparse SVC with the iris dataset
|
||||
for k in ('linear', 'poly', 'rbf'):
|
||||
sp_clf = svm.SVC(kernel=k).fit(iris.data, iris.target)
|
||||
clf = svm.SVC(kernel=k).fit(iris.data.toarray(),
|
||||
iris.target)
|
||||
|
||||
assert_array_almost_equal(clf.support_vectors_,
|
||||
sp_clf.support_vectors_.toarray())
|
||||
assert_array_almost_equal(clf.dual_coef_, sp_clf.dual_coef_.toarray())
|
||||
assert_array_almost_equal(
|
||||
clf.predict(iris.data.toarray()), sp_clf.predict(iris.data))
|
||||
if k == 'linear':
|
||||
assert_array_almost_equal(clf.coef_, sp_clf.coef_.toarray())
|
||||
|
||||
|
||||
def test_sparse_decision_function():
|
||||
# Test decision_function
|
||||
|
||||
# Sanity check, test that decision_function implemented in python
|
||||
# returns the same as the one in libsvm
|
||||
|
||||
# multi class:
|
||||
svc = svm.SVC(kernel='linear', C=0.1, decision_function_shape='ovo')
|
||||
clf = svc.fit(iris.data, iris.target)
|
||||
|
||||
dec = safe_sparse_dot(iris.data, clf.coef_.T) + clf.intercept_
|
||||
|
||||
assert_array_almost_equal(dec, clf.decision_function(iris.data))
|
||||
|
||||
# binary:
|
||||
clf.fit(X, Y)
|
||||
dec = np.dot(X, clf.coef_.T) + clf.intercept_
|
||||
prediction = clf.predict(X)
|
||||
assert_array_almost_equal(dec.ravel(), clf.decision_function(X))
|
||||
assert_array_almost_equal(
|
||||
prediction,
|
||||
clf.classes_[(clf.decision_function(X) > 0).astype(np.int).ravel()])
|
||||
expected = np.array([-1., -0.66, -1., 0.66, 1., 1.])
|
||||
assert_array_almost_equal(clf.decision_function(X), expected, 2)
|
||||
|
||||
|
||||
def test_error():
|
||||
# Test that it gives proper exception on deficient input
|
||||
# impossible value of C
|
||||
with pytest.raises(ValueError):
|
||||
svm.SVC(C=-1).fit(X, Y)
|
||||
|
||||
# impossible value of nu
|
||||
clf = svm.NuSVC(nu=0.0)
|
||||
with pytest.raises(ValueError):
|
||||
clf.fit(X_sp, Y)
|
||||
|
||||
Y2 = Y[:-1] # wrong dimensions for labels
|
||||
with pytest.raises(ValueError):
|
||||
clf.fit(X_sp, Y2)
|
||||
|
||||
clf = svm.SVC()
|
||||
clf.fit(X_sp, Y)
|
||||
assert_array_equal(clf.predict(T), true_result)
|
||||
|
||||
|
||||
def test_linearsvc():
|
||||
# Similar to test_SVC
|
||||
clf = svm.LinearSVC(random_state=0).fit(X, Y)
|
||||
sp_clf = svm.LinearSVC(random_state=0).fit(X_sp, Y)
|
||||
|
||||
assert sp_clf.fit_intercept
|
||||
|
||||
assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=4)
|
||||
assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=4)
|
||||
|
||||
assert_array_almost_equal(clf.predict(X), sp_clf.predict(X_sp))
|
||||
|
||||
clf.fit(X2, Y2)
|
||||
sp_clf.fit(X2_sp, Y2)
|
||||
|
||||
assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=4)
|
||||
assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=4)
|
||||
|
||||
|
||||
def test_linearsvc_iris():
|
||||
# Test the sparse LinearSVC with the iris dataset
|
||||
|
||||
sp_clf = svm.LinearSVC(random_state=0).fit(iris.data, iris.target)
|
||||
clf = svm.LinearSVC(random_state=0).fit(iris.data.toarray(), iris.target)
|
||||
|
||||
assert clf.fit_intercept == sp_clf.fit_intercept
|
||||
|
||||
assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=1)
|
||||
assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=1)
|
||||
assert_array_almost_equal(
|
||||
clf.predict(iris.data.toarray()), sp_clf.predict(iris.data))
|
||||
|
||||
# check decision_function
|
||||
pred = np.argmax(sp_clf.decision_function(iris.data), 1)
|
||||
assert_array_almost_equal(pred, clf.predict(iris.data.toarray()))
|
||||
|
||||
# sparsify the coefficients on both models and check that they still
|
||||
# produce the same results
|
||||
clf.sparsify()
|
||||
assert_array_equal(pred, clf.predict(iris.data))
|
||||
sp_clf.sparsify()
|
||||
assert_array_equal(pred, sp_clf.predict(iris.data))
|
||||
|
||||
|
||||
def test_weight():
|
||||
# Test class weights
|
||||
X_, y_ = make_classification(n_samples=200, n_features=100,
|
||||
weights=[0.833, 0.167], random_state=0)
|
||||
|
||||
X_ = sparse.csr_matrix(X_)
|
||||
for clf in (linear_model.LogisticRegression(),
|
||||
svm.LinearSVC(random_state=0),
|
||||
svm.SVC()):
|
||||
clf.set_params(class_weight={0: 5})
|
||||
clf.fit(X_[:180], y_[:180])
|
||||
y_pred = clf.predict(X_[180:])
|
||||
assert np.sum(y_pred == y_[180:]) >= 11
|
||||
|
||||
|
||||
def test_sample_weights():
|
||||
# Test weights on individual samples
|
||||
clf = svm.SVC()
|
||||
clf.fit(X_sp, Y)
|
||||
assert_array_equal(clf.predict([X[2]]), [1.])
|
||||
|
||||
sample_weight = [.1] * 3 + [10] * 3
|
||||
clf.fit(X_sp, Y, sample_weight=sample_weight)
|
||||
assert_array_equal(clf.predict([X[2]]), [2.])
|
||||
|
||||
|
||||
def test_sparse_liblinear_intercept_handling():
|
||||
# Test that sparse liblinear honours intercept_scaling param
|
||||
test_svm.test_dense_liblinear_intercept_handling(svm.LinearSVC)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("datasets_index", range(4))
|
||||
@pytest.mark.parametrize("kernel", ["linear", "poly", "rbf", "sigmoid"])
|
||||
@skip_if_32bit
|
||||
def test_sparse_oneclasssvm(datasets_index, kernel):
|
||||
# Check that sparse OneClassSVM gives the same result as dense OneClassSVM
|
||||
# many class dataset:
|
||||
X_blobs, _ = make_blobs(n_samples=100, centers=10, random_state=0)
|
||||
X_blobs = sparse.csr_matrix(X_blobs)
|
||||
datasets = [[X_sp, None, T], [X2_sp, None, T2],
|
||||
[X_blobs[:80], None, X_blobs[80:]],
|
||||
[iris.data, None, iris.data]]
|
||||
dataset = datasets[datasets_index]
|
||||
clf = svm.OneClassSVM(gamma=1, kernel=kernel)
|
||||
sp_clf = svm.OneClassSVM(gamma=1, kernel=kernel)
|
||||
check_svm_model_equal(clf, sp_clf, *dataset)
|
||||
|
||||
|
||||
def test_sparse_realdata():
|
||||
# Test on a subset from the 20newsgroups dataset.
|
||||
# This catches some bugs if input is not correctly converted into
|
||||
# sparse format or weights are not correctly initialized.
|
||||
|
||||
data = np.array([0.03771744, 0.1003567, 0.01174647, 0.027069])
|
||||
indices = np.array([6, 5, 35, 31])
|
||||
indptr = np.array(
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4])
|
||||
X = sparse.csr_matrix((data, indices, indptr))
|
||||
y = np.array(
|
||||
[1., 0., 2., 2., 1., 1., 1., 2., 2., 0., 1., 2., 2.,
|
||||
0., 2., 0., 3., 0., 3., 0., 1., 1., 3., 2., 3., 2.,
|
||||
0., 3., 1., 0., 2., 1., 2., 0., 1., 0., 2., 3., 1.,
|
||||
3., 0., 1., 0., 0., 2., 0., 1., 2., 2., 2., 3., 2.,
|
||||
0., 3., 2., 1., 2., 3., 2., 2., 0., 1., 0., 1., 2.,
|
||||
3., 0., 0., 2., 2., 1., 3., 1., 1., 0., 1., 2., 1.,
|
||||
1., 3.])
|
||||
|
||||
clf = svm.SVC(kernel='linear').fit(X.toarray(), y)
|
||||
sp_clf = svm.SVC(kernel='linear').fit(sparse.coo_matrix(X), y)
|
||||
|
||||
assert_array_equal(clf.support_vectors_, sp_clf.support_vectors_.toarray())
|
||||
assert_array_equal(clf.dual_coef_, sp_clf.dual_coef_.toarray())
|
||||
|
||||
|
||||
def test_sparse_svc_clone_with_callable_kernel():
|
||||
# Test that the "dense_fit" is called even though we use sparse input
|
||||
# meaning that everything works fine.
|
||||
a = svm.SVC(C=1, kernel=lambda x, y: x * y.T,
|
||||
probability=True, random_state=0)
|
||||
b = base.clone(a)
|
||||
|
||||
b.fit(X_sp, Y)
|
||||
pred = b.predict(X_sp)
|
||||
b.predict_proba(X_sp)
|
||||
|
||||
dense_svm = svm.SVC(C=1, kernel=lambda x, y: np.dot(x, y.T),
|
||||
probability=True, random_state=0)
|
||||
pred_dense = dense_svm.fit(X, Y).predict(X)
|
||||
assert_array_equal(pred_dense, pred)
|
||||
# b.decision_function(X_sp) # XXX : should be supported
|
||||
|
||||
|
||||
def test_timeout():
|
||||
sp = svm.SVC(C=1, kernel=lambda x, y: x * y.T,
|
||||
probability=True, random_state=0, max_iter=1)
|
||||
|
||||
assert_warns(ConvergenceWarning, sp.fit, X_sp, Y)
|
||||
|
||||
|
||||
def test_consistent_proba():
|
||||
a = svm.SVC(probability=True, max_iter=1, random_state=0)
|
||||
with ignore_warnings(category=ConvergenceWarning):
|
||||
proba_1 = a.fit(X, Y).predict_proba(X)
|
||||
a = svm.SVC(probability=True, max_iter=1, random_state=0)
|
||||
with ignore_warnings(category=ConvergenceWarning):
|
||||
proba_2 = a.fit(X, Y).predict_proba(X)
|
||||
assert_array_almost_equal(proba_1, proba_2)
|
1290
venv/Lib/site-packages/sklearn/svm/tests/test_svm.py
Normal file
1290
venv/Lib/site-packages/sklearn/svm/tests/test_svm.py
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue