Fixed database typo and removed unnecessary class identifier.

This commit is contained in:
Batuhan Berk Başoğlu 2020-10-14 10:10:37 -04:00
parent 00ad49a143
commit 45fb349a7d
5098 changed files with 952558 additions and 85 deletions

View file

@ -0,0 +1,249 @@
"""
=====================================
Sparse matrices (:mod:`scipy.sparse`)
=====================================
.. currentmodule:: scipy.sparse
SciPy 2-D sparse matrix package for numeric data.
Contents
========
Sparse matrix classes
---------------------
.. autosummary::
:toctree: generated/
bsr_matrix - Block Sparse Row matrix
coo_matrix - A sparse matrix in COOrdinate format
csc_matrix - Compressed Sparse Column matrix
csr_matrix - Compressed Sparse Row matrix
dia_matrix - Sparse matrix with DIAgonal storage
dok_matrix - Dictionary Of Keys based sparse matrix
lil_matrix - Row-based list of lists sparse matrix
spmatrix - Sparse matrix base class
Functions
---------
Building sparse matrices:
.. autosummary::
:toctree: generated/
eye - Sparse MxN matrix whose k-th diagonal is all ones
identity - Identity matrix in sparse format
kron - kronecker product of two sparse matrices
kronsum - kronecker sum of sparse matrices
diags - Return a sparse matrix from diagonals
spdiags - Return a sparse matrix from diagonals
block_diag - Build a block diagonal sparse matrix
tril - Lower triangular portion of a matrix in sparse format
triu - Upper triangular portion of a matrix in sparse format
bmat - Build a sparse matrix from sparse sub-blocks
hstack - Stack sparse matrices horizontally (column wise)
vstack - Stack sparse matrices vertically (row wise)
rand - Random values in a given shape
random - Random values in a given shape
Save and load sparse matrices:
.. autosummary::
:toctree: generated/
save_npz - Save a sparse matrix to a file using ``.npz`` format.
load_npz - Load a sparse matrix from a file using ``.npz`` format.
Sparse matrix tools:
.. autosummary::
:toctree: generated/
find
Identifying sparse matrices:
.. autosummary::
:toctree: generated/
issparse
isspmatrix
isspmatrix_csc
isspmatrix_csr
isspmatrix_bsr
isspmatrix_lil
isspmatrix_dok
isspmatrix_coo
isspmatrix_dia
Submodules
----------
.. autosummary::
csgraph - Compressed sparse graph routines
linalg - sparse linear algebra routines
Exceptions
----------
.. autosummary::
:toctree: generated/
SparseEfficiencyWarning
SparseWarning
Usage information
=================
There are seven available sparse matrix types:
1. csc_matrix: Compressed Sparse Column format
2. csr_matrix: Compressed Sparse Row format
3. bsr_matrix: Block Sparse Row format
4. lil_matrix: List of Lists format
5. dok_matrix: Dictionary of Keys format
6. coo_matrix: COOrdinate format (aka IJV, triplet format)
7. dia_matrix: DIAgonal format
To construct a matrix efficiently, use either dok_matrix or lil_matrix.
The lil_matrix class supports basic slicing and fancy indexing with a
similar syntax to NumPy arrays. As illustrated below, the COO format
may also be used to efficiently construct matrices. Despite their
similarity to NumPy arrays, it is **strongly discouraged** to use NumPy
functions directly on these matrices because NumPy may not properly convert
them for computations, leading to unexpected (and incorrect) results. If you
do want to apply a NumPy function to these matrices, first check if SciPy has
its own implementation for the given sparse matrix class, or **convert the
sparse matrix to a NumPy array** (e.g., using the `toarray()` method of the
class) first before applying the method.
To perform manipulations such as multiplication or inversion, first
convert the matrix to either CSC or CSR format. The lil_matrix format is
row-based, so conversion to CSR is efficient, whereas conversion to CSC
is less so.
All conversions among the CSR, CSC, and COO formats are efficient,
linear-time operations.
Matrix vector product
---------------------
To do a vector product between a sparse matrix and a vector simply use
the matrix `dot` method, as described in its docstring:
>>> import numpy as np
>>> from scipy.sparse import csr_matrix
>>> A = csr_matrix([[1, 2, 0], [0, 0, 3], [4, 0, 5]])
>>> v = np.array([1, 0, -1])
>>> A.dot(v)
array([ 1, -3, -1], dtype=int64)
.. warning:: As of NumPy 1.7, `np.dot` is not aware of sparse matrices,
therefore using it will result on unexpected results or errors.
The corresponding dense array should be obtained first instead:
>>> np.dot(A.toarray(), v)
array([ 1, -3, -1], dtype=int64)
but then all the performance advantages would be lost.
The CSR format is specially suitable for fast matrix vector products.
Example 1
---------
Construct a 1000x1000 lil_matrix and add some values to it:
>>> from scipy.sparse import lil_matrix
>>> from scipy.sparse.linalg import spsolve
>>> from numpy.linalg import solve, norm
>>> from numpy.random import rand
>>> A = lil_matrix((1000, 1000))
>>> A[0, :100] = rand(100)
>>> A[1, 100:200] = A[0, :100]
>>> A.setdiag(rand(1000))
Now convert it to CSR format and solve A x = b for x:
>>> A = A.tocsr()
>>> b = rand(1000)
>>> x = spsolve(A, b)
Convert it to a dense matrix and solve, and check that the result
is the same:
>>> x_ = solve(A.toarray(), b)
Now we can compute norm of the error with:
>>> err = norm(x-x_)
>>> err < 1e-10
True
It should be small :)
Example 2
---------
Construct a matrix in COO format:
>>> from scipy import sparse
>>> from numpy import array
>>> I = array([0,3,1,0])
>>> J = array([0,3,1,2])
>>> V = array([4,5,7,9])
>>> A = sparse.coo_matrix((V,(I,J)),shape=(4,4))
Notice that the indices do not need to be sorted.
Duplicate (i,j) entries are summed when converting to CSR or CSC.
>>> I = array([0,0,1,3,1,0,0])
>>> J = array([0,2,1,3,1,0,0])
>>> V = array([1,1,1,1,1,1,1])
>>> B = sparse.coo_matrix((V,(I,J)),shape=(4,4)).tocsr()
This is useful for constructing finite-element stiffness and mass matrices.
Further details
---------------
CSR column indices are not necessarily sorted. Likewise for CSC row
indices. Use the .sorted_indices() and .sort_indices() methods when
sorted indices are required (e.g., when passing data to other libraries).
"""
# Original code by Travis Oliphant.
# Modified and extended by Ed Schofield, Robert Cimrman,
# Nathan Bell, and Jake Vanderplas.
import warnings as _warnings
from .base import *
from .csr import *
from .csc import *
from .lil import *
from .dok import *
from .coo import *
from .dia import *
from .bsr import *
from .construct import *
from .extract import *
from ._matrix_io import *
# For backward compatibility with v0.19.
from . import csgraph
__all__ = [s for s in dir() if not s.startswith('_')]
# Filter PendingDeprecationWarning for np.matrix introduced with numpy 1.15
_warnings.filterwarnings('ignore', message='the matrix subclass is not the recommended way')
from scipy._lib._testutils import PytestTester
test = PytestTester(__name__)
del PytestTester

View file

@ -0,0 +1,367 @@
"""Indexing mixin for sparse matrix classes.
"""
import numpy as np
from .sputils import isintlike
try:
INT_TYPES = (int, long, np.integer)
except NameError:
# long is not defined in Python3
INT_TYPES = (int, np.integer)
def _broadcast_arrays(a, b):
"""
Same as np.broadcast_arrays(a, b) but old writeability rules.
NumPy >= 1.17.0 transitions broadcast_arrays to return
read-only arrays. Set writeability explicitly to avoid warnings.
Retain the old writeability rules, as our Cython code assumes
the old behavior.
"""
x, y = np.broadcast_arrays(a, b)
x.flags.writeable = a.flags.writeable
y.flags.writeable = b.flags.writeable
return x, y
class IndexMixin(object):
"""
This class provides common dispatching and validation logic for indexing.
"""
def __getitem__(self, key):
row, col = self._validate_indices(key)
# Dispatch to specialized methods.
if isinstance(row, INT_TYPES):
if isinstance(col, INT_TYPES):
return self._get_intXint(row, col)
elif isinstance(col, slice):
return self._get_intXslice(row, col)
elif col.ndim == 1:
return self._get_intXarray(row, col)
raise IndexError('index results in >2 dimensions')
elif isinstance(row, slice):
if isinstance(col, INT_TYPES):
return self._get_sliceXint(row, col)
elif isinstance(col, slice):
if row == slice(None) and row == col:
return self.copy()
return self._get_sliceXslice(row, col)
elif col.ndim == 1:
return self._get_sliceXarray(row, col)
raise IndexError('index results in >2 dimensions')
elif row.ndim == 1:
if isinstance(col, INT_TYPES):
return self._get_arrayXint(row, col)
elif isinstance(col, slice):
return self._get_arrayXslice(row, col)
else: # row.ndim == 2
if isinstance(col, INT_TYPES):
return self._get_arrayXint(row, col)
elif isinstance(col, slice):
raise IndexError('index results in >2 dimensions')
elif row.shape[1] == 1 and (col.ndim == 1 or col.shape[0] == 1):
# special case for outer indexing
return self._get_columnXarray(row[:,0], col.ravel())
# The only remaining case is inner (fancy) indexing
row, col = _broadcast_arrays(row, col)
if row.shape != col.shape:
raise IndexError('number of row and column indices differ')
if row.size == 0:
return self.__class__(np.atleast_2d(row).shape, dtype=self.dtype)
return self._get_arrayXarray(row, col)
def __setitem__(self, key, x):
row, col = self._validate_indices(key)
if isinstance(row, INT_TYPES) and isinstance(col, INT_TYPES):
x = np.asarray(x, dtype=self.dtype)
if x.size != 1:
raise ValueError('Trying to assign a sequence to an item')
self._set_intXint(row, col, x.flat[0])
return
if isinstance(row, slice):
row = np.arange(*row.indices(self.shape[0]))[:, None]
else:
row = np.atleast_1d(row)
if isinstance(col, slice):
col = np.arange(*col.indices(self.shape[1]))[None, :]
if row.ndim == 1:
row = row[:, None]
else:
col = np.atleast_1d(col)
i, j = _broadcast_arrays(row, col)
if i.shape != j.shape:
raise IndexError('number of row and column indices differ')
from .base import isspmatrix
if isspmatrix(x):
if i.ndim == 1:
# Inner indexing, so treat them like row vectors.
i = i[None]
j = j[None]
broadcast_row = x.shape[0] == 1 and i.shape[0] != 1
broadcast_col = x.shape[1] == 1 and i.shape[1] != 1
if not ((broadcast_row or x.shape[0] == i.shape[0]) and
(broadcast_col or x.shape[1] == i.shape[1])):
raise ValueError('shape mismatch in assignment')
if x.size == 0:
return
x = x.tocoo(copy=True)
x.sum_duplicates()
self._set_arrayXarray_sparse(i, j, x)
else:
# Make x and i into the same shape
x = np.asarray(x, dtype=self.dtype)
x, _ = _broadcast_arrays(x, i)
if x.size == 0:
return
x = x.reshape(i.shape)
self._set_arrayXarray(i, j, x)
def _validate_indices(self, key):
M, N = self.shape
row, col = _unpack_index(key)
if isintlike(row):
row = int(row)
if row < -M or row >= M:
raise IndexError('row index (%d) out of range' % row)
if row < 0:
row += M
elif not isinstance(row, slice):
row = self._asindices(row, M)
if isintlike(col):
col = int(col)
if col < -N or col >= N:
raise IndexError('column index (%d) out of range' % col)
if col < 0:
col += N
elif not isinstance(col, slice):
col = self._asindices(col, N)
return row, col
def _asindices(self, idx, length):
"""Convert `idx` to a valid index for an axis with a given length.
Subclasses that need special validation can override this method.
"""
try:
x = np.asarray(idx)
except (ValueError, TypeError, MemoryError):
raise IndexError('invalid index')
if x.ndim not in (1, 2):
raise IndexError('Index dimension must be <= 2')
if x.size == 0:
return x
# Check bounds
max_indx = x.max()
if max_indx >= length:
raise IndexError('index (%d) out of range' % max_indx)
min_indx = x.min()
if min_indx < 0:
if min_indx < -length:
raise IndexError('index (%d) out of range' % min_indx)
if x is idx or not x.flags.owndata:
x = x.copy()
x[x < 0] += length
return x
def getrow(self, i):
"""Return a copy of row i of the matrix, as a (1 x n) row vector.
"""
M, N = self.shape
i = int(i)
if i < -M or i >= M:
raise IndexError('index (%d) out of range' % i)
if i < 0:
i += M
return self._get_intXslice(i, slice(None))
def getcol(self, i):
"""Return a copy of column i of the matrix, as a (m x 1) column vector.
"""
M, N = self.shape
i = int(i)
if i < -N or i >= N:
raise IndexError('index (%d) out of range' % i)
if i < 0:
i += N
return self._get_sliceXint(slice(None), i)
def _get_intXint(self, row, col):
raise NotImplementedError()
def _get_intXarray(self, row, col):
raise NotImplementedError()
def _get_intXslice(self, row, col):
raise NotImplementedError()
def _get_sliceXint(self, row, col):
raise NotImplementedError()
def _get_sliceXslice(self, row, col):
raise NotImplementedError()
def _get_sliceXarray(self, row, col):
raise NotImplementedError()
def _get_arrayXint(self, row, col):
raise NotImplementedError()
def _get_arrayXslice(self, row, col):
raise NotImplementedError()
def _get_columnXarray(self, row, col):
raise NotImplementedError()
def _get_arrayXarray(self, row, col):
raise NotImplementedError()
def _set_intXint(self, row, col, x):
raise NotImplementedError()
def _set_arrayXarray(self, row, col, x):
raise NotImplementedError()
def _set_arrayXarray_sparse(self, row, col, x):
# Fall back to densifying x
x = np.asarray(x.toarray(), dtype=self.dtype)
x, _ = _broadcast_arrays(x, row)
self._set_arrayXarray(row, col, x)
def _unpack_index(index):
""" Parse index. Always return a tuple of the form (row, col).
Valid type for row/col is integer, slice, or array of integers.
"""
# First, check if indexing with single boolean matrix.
from .base import spmatrix, isspmatrix
if (isinstance(index, (spmatrix, np.ndarray)) and
index.ndim == 2 and index.dtype.kind == 'b'):
return index.nonzero()
# Parse any ellipses.
index = _check_ellipsis(index)
# Next, parse the tuple or object
if isinstance(index, tuple):
if len(index) == 2:
row, col = index
elif len(index) == 1:
row, col = index[0], slice(None)
else:
raise IndexError('invalid number of indices')
else:
idx = _compatible_boolean_index(index)
if idx is None:
row, col = index, slice(None)
elif idx.ndim < 2:
return _boolean_index_to_array(idx), slice(None)
elif idx.ndim == 2:
return idx.nonzero()
# Next, check for validity and transform the index as needed.
if isspmatrix(row) or isspmatrix(col):
# Supporting sparse boolean indexing with both row and col does
# not work because spmatrix.ndim is always 2.
raise IndexError(
'Indexing with sparse matrices is not supported '
'except boolean indexing where matrix and index '
'are equal shapes.')
bool_row = _compatible_boolean_index(row)
bool_col = _compatible_boolean_index(col)
if bool_row is not None:
row = _boolean_index_to_array(bool_row)
if bool_col is not None:
col = _boolean_index_to_array(bool_col)
return row, col
def _check_ellipsis(index):
"""Process indices with Ellipsis. Returns modified index."""
if index is Ellipsis:
return (slice(None), slice(None))
if not isinstance(index, tuple):
return index
# TODO: Deprecate this multiple-ellipsis handling,
# as numpy no longer supports it.
# Find first ellipsis.
for j, v in enumerate(index):
if v is Ellipsis:
first_ellipsis = j
break
else:
return index
# Try to expand it using shortcuts for common cases
if len(index) == 1:
return (slice(None), slice(None))
if len(index) == 2:
if first_ellipsis == 0:
if index[1] is Ellipsis:
return (slice(None), slice(None))
return (slice(None), index[1])
return (index[0], slice(None))
# Expand it using a general-purpose algorithm
tail = []
for v in index[first_ellipsis+1:]:
if v is not Ellipsis:
tail.append(v)
nd = first_ellipsis + len(tail)
nslice = max(0, 2 - nd)
return index[:first_ellipsis] + (slice(None),)*nslice + tuple(tail)
def _maybe_bool_ndarray(idx):
"""Returns a compatible array if elements are boolean.
"""
idx = np.asanyarray(idx)
if idx.dtype.kind == 'b':
return idx
return None
def _first_element_bool(idx, max_dim=2):
"""Returns True if first element of the incompatible
array type is boolean.
"""
if max_dim < 1:
return None
try:
first = next(iter(idx), None)
except TypeError:
return None
if isinstance(first, bool):
return True
return _first_element_bool(first, max_dim-1)
def _compatible_boolean_index(idx):
"""Returns a boolean index array that can be converted to
integer array. Returns None if no such array exists.
"""
# Presence of attribute `ndim` indicates a compatible array type.
if hasattr(idx, 'ndim') or _first_element_bool(idx):
return _maybe_bool_ndarray(idx)
return None
def _boolean_index_to_array(idx):
if idx.ndim > 1:
raise IndexError('invalid index shape')
return np.where(idx)[0]

View file

@ -0,0 +1,149 @@
import numpy as np
import scipy.sparse
__all__ = ['save_npz', 'load_npz']
# Make loading safe vs. malicious input
PICKLE_KWARGS = dict(allow_pickle=False)
def save_npz(file, matrix, compressed=True):
""" Save a sparse matrix to a file using ``.npz`` format.
Parameters
----------
file : str or file-like object
Either the file name (string) or an open file (file-like object)
where the data will be saved. If file is a string, the ``.npz``
extension will be appended to the file name if it is not already
there.
matrix: spmatrix (format: ``csc``, ``csr``, ``bsr``, ``dia`` or coo``)
The sparse matrix to save.
compressed : bool, optional
Allow compressing the file. Default: True
See Also
--------
scipy.sparse.load_npz: Load a sparse matrix from a file using ``.npz`` format.
numpy.savez: Save several arrays into a ``.npz`` archive.
numpy.savez_compressed : Save several arrays into a compressed ``.npz`` archive.
Examples
--------
Store sparse matrix to disk, and load it again:
>>> import scipy.sparse
>>> sparse_matrix = scipy.sparse.csc_matrix(np.array([[0, 0, 3], [4, 0, 0]]))
>>> sparse_matrix
<2x3 sparse matrix of type '<class 'numpy.int64'>'
with 2 stored elements in Compressed Sparse Column format>
>>> sparse_matrix.todense()
matrix([[0, 0, 3],
[4, 0, 0]], dtype=int64)
>>> scipy.sparse.save_npz('/tmp/sparse_matrix.npz', sparse_matrix)
>>> sparse_matrix = scipy.sparse.load_npz('/tmp/sparse_matrix.npz')
>>> sparse_matrix
<2x3 sparse matrix of type '<class 'numpy.int64'>'
with 2 stored elements in Compressed Sparse Column format>
>>> sparse_matrix.todense()
matrix([[0, 0, 3],
[4, 0, 0]], dtype=int64)
"""
arrays_dict = {}
if matrix.format in ('csc', 'csr', 'bsr'):
arrays_dict.update(indices=matrix.indices, indptr=matrix.indptr)
elif matrix.format == 'dia':
arrays_dict.update(offsets=matrix.offsets)
elif matrix.format == 'coo':
arrays_dict.update(row=matrix.row, col=matrix.col)
else:
raise NotImplementedError('Save is not implemented for sparse matrix of format {}.'.format(matrix.format))
arrays_dict.update(
format=matrix.format.encode('ascii'),
shape=matrix.shape,
data=matrix.data
)
if compressed:
np.savez_compressed(file, **arrays_dict)
else:
np.savez(file, **arrays_dict)
def load_npz(file):
""" Load a sparse matrix from a file using ``.npz`` format.
Parameters
----------
file : str or file-like object
Either the file name (string) or an open file (file-like object)
where the data will be loaded.
Returns
-------
result : csc_matrix, csr_matrix, bsr_matrix, dia_matrix or coo_matrix
A sparse matrix containing the loaded data.
Raises
------
IOError
If the input file does not exist or cannot be read.
See Also
--------
scipy.sparse.save_npz: Save a sparse matrix to a file using ``.npz`` format.
numpy.load: Load several arrays from a ``.npz`` archive.
Examples
--------
Store sparse matrix to disk, and load it again:
>>> import scipy.sparse
>>> sparse_matrix = scipy.sparse.csc_matrix(np.array([[0, 0, 3], [4, 0, 0]]))
>>> sparse_matrix
<2x3 sparse matrix of type '<class 'numpy.int64'>'
with 2 stored elements in Compressed Sparse Column format>
>>> sparse_matrix.todense()
matrix([[0, 0, 3],
[4, 0, 0]], dtype=int64)
>>> scipy.sparse.save_npz('/tmp/sparse_matrix.npz', sparse_matrix)
>>> sparse_matrix = scipy.sparse.load_npz('/tmp/sparse_matrix.npz')
>>> sparse_matrix
<2x3 sparse matrix of type '<class 'numpy.int64'>'
with 2 stored elements in Compressed Sparse Column format>
>>> sparse_matrix.todense()
matrix([[0, 0, 3],
[4, 0, 0]], dtype=int64)
"""
with np.load(file, **PICKLE_KWARGS) as loaded:
try:
matrix_format = loaded['format']
except KeyError:
raise ValueError('The file {} does not contain a sparse matrix.'.format(file))
matrix_format = matrix_format.item()
if not isinstance(matrix_format, str):
# Play safe with Python 2 vs 3 backward compatibility;
# files saved with SciPy < 1.0.0 may contain unicode or bytes.
matrix_format = matrix_format.decode('ascii')
try:
cls = getattr(scipy.sparse, '{}_matrix'.format(matrix_format))
except AttributeError:
raise ValueError('Unknown matrix format "{}"'.format(matrix_format))
if matrix_format in ('csc', 'csr', 'bsr'):
return cls((loaded['data'], loaded['indices'], loaded['indptr']), shape=loaded['shape'])
elif matrix_format == 'dia':
return cls((loaded['data'], loaded['offsets']), shape=loaded['shape'])
elif matrix_format == 'coo':
return cls((loaded['data'], (loaded['row'], loaded['col'])), shape=loaded['shape'])
else:
raise NotImplementedError('Load is not implemented for '
'sparse matrix of format {}.'.format(matrix_format))

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,715 @@
"""Compressed Block Sparse Row matrix format"""
__docformat__ = "restructuredtext en"
__all__ = ['bsr_matrix', 'isspmatrix_bsr']
from warnings import warn
import numpy as np
from .data import _data_matrix, _minmax_mixin
from .compressed import _cs_matrix
from .base import isspmatrix, _formats, spmatrix
from .sputils import (isshape, getdtype, to_native, upcast, get_index_dtype,
check_shape)
from . import _sparsetools
from ._sparsetools import (bsr_matvec, bsr_matvecs, csr_matmat_maxnnz,
bsr_matmat, bsr_transpose, bsr_sort_indices,
bsr_tocsr)
class bsr_matrix(_cs_matrix, _minmax_mixin):
"""Block Sparse Row matrix
This can be instantiated in several ways:
bsr_matrix(D, [blocksize=(R,C)])
where D is a dense matrix or 2-D ndarray.
bsr_matrix(S, [blocksize=(R,C)])
with another sparse matrix S (equivalent to S.tobsr())
bsr_matrix((M, N), [blocksize=(R,C), dtype])
to construct an empty matrix with shape (M, N)
dtype is optional, defaulting to dtype='d'.
bsr_matrix((data, ij), [blocksize=(R,C), shape=(M, N)])
where ``data`` and ``ij`` satisfy ``a[ij[0, k], ij[1, k]] = data[k]``
bsr_matrix((data, indices, indptr), [shape=(M, N)])
is the standard BSR representation where the block column
indices for row i are stored in ``indices[indptr[i]:indptr[i+1]]``
and their corresponding block values are stored in
``data[ indptr[i]: indptr[i+1] ]``. If the shape parameter is not
supplied, the matrix dimensions are inferred from the index arrays.
Attributes
----------
dtype : dtype
Data type of the matrix
shape : 2-tuple
Shape of the matrix
ndim : int
Number of dimensions (this is always 2)
nnz
Number of stored values, including explicit zeros
data
Data array of the matrix
indices
BSR format index array
indptr
BSR format index pointer array
blocksize
Block size of the matrix
has_sorted_indices
Whether indices are sorted
Notes
-----
Sparse matrices can be used in arithmetic operations: they support
addition, subtraction, multiplication, division, and matrix power.
**Summary of BSR format**
The Block Compressed Row (BSR) format is very similar to the Compressed
Sparse Row (CSR) format. BSR is appropriate for sparse matrices with dense
sub matrices like the last example below. Block matrices often arise in
vector-valued finite element discretizations. In such cases, BSR is
considerably more efficient than CSR and CSC for many sparse arithmetic
operations.
**Blocksize**
The blocksize (R,C) must evenly divide the shape of the matrix (M,N).
That is, R and C must satisfy the relationship ``M % R = 0`` and
``N % C = 0``.
If no blocksize is specified, a simple heuristic is applied to determine
an appropriate blocksize.
Examples
--------
>>> from scipy.sparse import bsr_matrix
>>> bsr_matrix((3, 4), dtype=np.int8).toarray()
array([[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]], dtype=int8)
>>> row = np.array([0, 0, 1, 2, 2, 2])
>>> col = np.array([0, 2, 2, 0, 1, 2])
>>> data = np.array([1, 2, 3 ,4, 5, 6])
>>> bsr_matrix((data, (row, col)), shape=(3, 3)).toarray()
array([[1, 0, 2],
[0, 0, 3],
[4, 5, 6]])
>>> indptr = np.array([0, 2, 3, 6])
>>> indices = np.array([0, 2, 2, 0, 1, 2])
>>> data = np.array([1, 2, 3, 4, 5, 6]).repeat(4).reshape(6, 2, 2)
>>> bsr_matrix((data,indices,indptr), shape=(6, 6)).toarray()
array([[1, 1, 0, 0, 2, 2],
[1, 1, 0, 0, 2, 2],
[0, 0, 0, 0, 3, 3],
[0, 0, 0, 0, 3, 3],
[4, 4, 5, 5, 6, 6],
[4, 4, 5, 5, 6, 6]])
"""
format = 'bsr'
def __init__(self, arg1, shape=None, dtype=None, copy=False, blocksize=None):
_data_matrix.__init__(self)
if isspmatrix(arg1):
if isspmatrix_bsr(arg1) and copy:
arg1 = arg1.copy()
else:
arg1 = arg1.tobsr(blocksize=blocksize)
self._set_self(arg1)
elif isinstance(arg1,tuple):
if isshape(arg1):
# it's a tuple of matrix dimensions (M,N)
self._shape = check_shape(arg1)
M,N = self.shape
# process blocksize
if blocksize is None:
blocksize = (1,1)
else:
if not isshape(blocksize):
raise ValueError('invalid blocksize=%s' % blocksize)
blocksize = tuple(blocksize)
self.data = np.zeros((0,) + blocksize, getdtype(dtype, default=float))
R,C = blocksize
if (M % R) != 0 or (N % C) != 0:
raise ValueError('shape must be multiple of blocksize')
# Select index dtype large enough to pass array and
# scalar parameters to sparsetools
idx_dtype = get_index_dtype(maxval=max(M//R, N//C, R, C))
self.indices = np.zeros(0, dtype=idx_dtype)
self.indptr = np.zeros(M//R + 1, dtype=idx_dtype)
elif len(arg1) == 2:
# (data,(row,col)) format
from .coo import coo_matrix
self._set_self(coo_matrix(arg1, dtype=dtype).tobsr(blocksize=blocksize))
elif len(arg1) == 3:
# (data,indices,indptr) format
(data, indices, indptr) = arg1
# Select index dtype large enough to pass array and
# scalar parameters to sparsetools
maxval = 1
if shape is not None:
maxval = max(shape)
if blocksize is not None:
maxval = max(maxval, max(blocksize))
idx_dtype = get_index_dtype((indices, indptr), maxval=maxval, check_contents=True)
self.indices = np.array(indices, copy=copy, dtype=idx_dtype)
self.indptr = np.array(indptr, copy=copy, dtype=idx_dtype)
self.data = np.array(data, copy=copy, dtype=getdtype(dtype, data))
else:
raise ValueError('unrecognized bsr_matrix constructor usage')
else:
# must be dense
try:
arg1 = np.asarray(arg1)
except Exception:
raise ValueError("unrecognized form for"
" %s_matrix constructor" % self.format)
from .coo import coo_matrix
arg1 = coo_matrix(arg1, dtype=dtype).tobsr(blocksize=blocksize)
self._set_self(arg1)
if shape is not None:
self._shape = check_shape(shape)
else:
if self.shape is None:
# shape not already set, try to infer dimensions
try:
M = len(self.indptr) - 1
N = self.indices.max() + 1
except Exception:
raise ValueError('unable to infer matrix dimensions')
else:
R,C = self.blocksize
self._shape = check_shape((M*R,N*C))
if self.shape is None:
if shape is None:
# TODO infer shape here
raise ValueError('need to infer shape')
else:
self._shape = check_shape(shape)
if dtype is not None:
self.data = self.data.astype(dtype, copy=False)
self.check_format(full_check=False)
def check_format(self, full_check=True):
"""check whether the matrix format is valid
*Parameters*:
full_check:
True - rigorous check, O(N) operations : default
False - basic check, O(1) operations
"""
M,N = self.shape
R,C = self.blocksize
# index arrays should have integer data types
if self.indptr.dtype.kind != 'i':
warn("indptr array has non-integer dtype (%s)"
% self.indptr.dtype.name)
if self.indices.dtype.kind != 'i':
warn("indices array has non-integer dtype (%s)"
% self.indices.dtype.name)
idx_dtype = get_index_dtype((self.indices, self.indptr))
self.indptr = np.asarray(self.indptr, dtype=idx_dtype)
self.indices = np.asarray(self.indices, dtype=idx_dtype)
self.data = to_native(self.data)
# check array shapes
if self.indices.ndim != 1 or self.indptr.ndim != 1:
raise ValueError("indices, and indptr should be 1-D")
if self.data.ndim != 3:
raise ValueError("data should be 3-D")
# check index pointer
if (len(self.indptr) != M//R + 1):
raise ValueError("index pointer size (%d) should be (%d)" %
(len(self.indptr), M//R + 1))
if (self.indptr[0] != 0):
raise ValueError("index pointer should start with 0")
# check index and data arrays
if (len(self.indices) != len(self.data)):
raise ValueError("indices and data should have the same size")
if (self.indptr[-1] > len(self.indices)):
raise ValueError("Last value of index pointer should be less than "
"the size of index and data arrays")
self.prune()
if full_check:
# check format validity (more expensive)
if self.nnz > 0:
if self.indices.max() >= N//C:
raise ValueError("column index values must be < %d (now max %d)" % (N//C, self.indices.max()))
if self.indices.min() < 0:
raise ValueError("column index values must be >= 0")
if np.diff(self.indptr).min() < 0:
raise ValueError("index pointer values must form a "
"non-decreasing sequence")
# if not self.has_sorted_indices():
# warn('Indices were not in sorted order. Sorting indices.')
# self.sort_indices(check_first=False)
def _get_blocksize(self):
return self.data.shape[1:]
blocksize = property(fget=_get_blocksize)
def getnnz(self, axis=None):
if axis is not None:
raise NotImplementedError("getnnz over an axis is not implemented "
"for BSR format")
R,C = self.blocksize
return int(self.indptr[-1] * R * C)
getnnz.__doc__ = spmatrix.getnnz.__doc__
def __repr__(self):
format = _formats[self.getformat()][1]
return ("<%dx%d sparse matrix of type '%s'\n"
"\twith %d stored elements (blocksize = %dx%d) in %s format>" %
(self.shape + (self.dtype.type, self.nnz) + self.blocksize +
(format,)))
def diagonal(self, k=0):
rows, cols = self.shape
if k <= -rows or k >= cols:
return np.empty(0, dtype=self.data.dtype)
R, C = self.blocksize
y = np.zeros(min(rows + min(k, 0), cols - max(k, 0)),
dtype=upcast(self.dtype))
_sparsetools.bsr_diagonal(k, rows // R, cols // C, R, C,
self.indptr, self.indices,
np.ravel(self.data), y)
return y
diagonal.__doc__ = spmatrix.diagonal.__doc__
##########################
# NotImplemented methods #
##########################
def __getitem__(self,key):
raise NotImplementedError
def __setitem__(self,key,val):
raise NotImplementedError
######################
# Arithmetic methods #
######################
@np.deprecate(message="BSR matvec is deprecated in SciPy 0.19.0. "
"Use * operator instead.")
def matvec(self, other):
"""Multiply matrix by vector."""
return self * other
@np.deprecate(message="BSR matmat is deprecated in SciPy 0.19.0. "
"Use * operator instead.")
def matmat(self, other):
"""Multiply this sparse matrix by other matrix."""
return self * other
def _add_dense(self, other):
return self.tocoo(copy=False)._add_dense(other)
def _mul_vector(self, other):
M,N = self.shape
R,C = self.blocksize
result = np.zeros(self.shape[0], dtype=upcast(self.dtype, other.dtype))
bsr_matvec(M//R, N//C, R, C,
self.indptr, self.indices, self.data.ravel(),
other, result)
return result
def _mul_multivector(self,other):
R,C = self.blocksize
M,N = self.shape
n_vecs = other.shape[1] # number of column vectors
result = np.zeros((M,n_vecs), dtype=upcast(self.dtype,other.dtype))
bsr_matvecs(M//R, N//C, n_vecs, R, C,
self.indptr, self.indices, self.data.ravel(),
other.ravel(), result.ravel())
return result
def _mul_sparse_matrix(self, other):
M, K1 = self.shape
K2, N = other.shape
R,n = self.blocksize
# convert to this format
if isspmatrix_bsr(other):
C = other.blocksize[1]
else:
C = 1
from .csr import isspmatrix_csr
if isspmatrix_csr(other) and n == 1:
other = other.tobsr(blocksize=(n,C), copy=False) # lightweight conversion
else:
other = other.tobsr(blocksize=(n,C))
idx_dtype = get_index_dtype((self.indptr, self.indices,
other.indptr, other.indices))
bnnz = csr_matmat_maxnnz(M//R, N//C,
self.indptr.astype(idx_dtype),
self.indices.astype(idx_dtype),
other.indptr.astype(idx_dtype),
other.indices.astype(idx_dtype))
idx_dtype = get_index_dtype((self.indptr, self.indices,
other.indptr, other.indices),
maxval=bnnz)
indptr = np.empty(self.indptr.shape, dtype=idx_dtype)
indices = np.empty(bnnz, dtype=idx_dtype)
data = np.empty(R*C*bnnz, dtype=upcast(self.dtype,other.dtype))
bsr_matmat(bnnz, M//R, N//C, R, C, n,
self.indptr.astype(idx_dtype),
self.indices.astype(idx_dtype),
np.ravel(self.data),
other.indptr.astype(idx_dtype),
other.indices.astype(idx_dtype),
np.ravel(other.data),
indptr,
indices,
data)
data = data.reshape(-1,R,C)
# TODO eliminate zeros
return bsr_matrix((data,indices,indptr),shape=(M,N),blocksize=(R,C))
######################
# Conversion methods #
######################
def tobsr(self, blocksize=None, copy=False):
"""Convert this matrix into Block Sparse Row Format.
With copy=False, the data/indices may be shared between this
matrix and the resultant bsr_matrix.
If blocksize=(R, C) is provided, it will be used for determining
block size of the bsr_matrix.
"""
if blocksize not in [None, self.blocksize]:
return self.tocsr().tobsr(blocksize=blocksize)
if copy:
return self.copy()
else:
return self
def tocsr(self, copy=False):
M, N = self.shape
R, C = self.blocksize
nnz = self.nnz
idx_dtype = get_index_dtype((self.indptr, self.indices),
maxval=max(nnz, N))
indptr = np.empty(M + 1, dtype=idx_dtype)
indices = np.empty(nnz, dtype=idx_dtype)
data = np.empty(nnz, dtype=upcast(self.dtype))
bsr_tocsr(M // R, # n_brow
N // C, # n_bcol
R, C,
self.indptr.astype(idx_dtype, copy=False),
self.indices.astype(idx_dtype, copy=False),
self.data,
indptr,
indices,
data)
from .csr import csr_matrix
return csr_matrix((data, indices, indptr), shape=self.shape)
tocsr.__doc__ = spmatrix.tocsr.__doc__
def tocsc(self, copy=False):
return self.tocsr(copy=False).tocsc(copy=copy)
tocsc.__doc__ = spmatrix.tocsc.__doc__
def tocoo(self, copy=True):
"""Convert this matrix to COOrdinate format.
When copy=False the data array will be shared between
this matrix and the resultant coo_matrix.
"""
M,N = self.shape
R,C = self.blocksize
indptr_diff = np.diff(self.indptr)
if indptr_diff.dtype.itemsize > np.dtype(np.intp).itemsize:
# Check for potential overflow
indptr_diff_limited = indptr_diff.astype(np.intp)
if np.any(indptr_diff_limited != indptr_diff):
raise ValueError("Matrix too big to convert")
indptr_diff = indptr_diff_limited
row = (R * np.arange(M//R)).repeat(indptr_diff)
row = row.repeat(R*C).reshape(-1,R,C)
row += np.tile(np.arange(R).reshape(-1,1), (1,C))
row = row.reshape(-1)
col = (C * self.indices).repeat(R*C).reshape(-1,R,C)
col += np.tile(np.arange(C), (R,1))
col = col.reshape(-1)
data = self.data.reshape(-1)
if copy:
data = data.copy()
from .coo import coo_matrix
return coo_matrix((data,(row,col)), shape=self.shape)
def toarray(self, order=None, out=None):
return self.tocoo(copy=False).toarray(order=order, out=out)
toarray.__doc__ = spmatrix.toarray.__doc__
def transpose(self, axes=None, copy=False):
if axes is not None:
raise ValueError(("Sparse matrices do not support "
"an 'axes' parameter because swapping "
"dimensions is the only logical permutation."))
R, C = self.blocksize
M, N = self.shape
NBLK = self.nnz//(R*C)
if self.nnz == 0:
return bsr_matrix((N, M), blocksize=(C, R),
dtype=self.dtype, copy=copy)
indptr = np.empty(N//C + 1, dtype=self.indptr.dtype)
indices = np.empty(NBLK, dtype=self.indices.dtype)
data = np.empty((NBLK, C, R), dtype=self.data.dtype)
bsr_transpose(M//R, N//C, R, C,
self.indptr, self.indices, self.data.ravel(),
indptr, indices, data.ravel())
return bsr_matrix((data, indices, indptr),
shape=(N, M), copy=copy)
transpose.__doc__ = spmatrix.transpose.__doc__
##############################################################
# methods that examine or modify the internal data structure #
##############################################################
def eliminate_zeros(self):
"""Remove zero elements in-place."""
if not self.nnz:
return # nothing to do
R,C = self.blocksize
M,N = self.shape
mask = (self.data != 0).reshape(-1,R*C).sum(axis=1) # nonzero blocks
nonzero_blocks = mask.nonzero()[0]
self.data[:len(nonzero_blocks)] = self.data[nonzero_blocks]
# modifies self.indptr and self.indices *in place*
_sparsetools.csr_eliminate_zeros(M//R, N//C, self.indptr,
self.indices, mask)
self.prune()
def sum_duplicates(self):
"""Eliminate duplicate matrix entries by adding them together
The is an *in place* operation
"""
if self.has_canonical_format:
return
self.sort_indices()
R, C = self.blocksize
M, N = self.shape
# port of _sparsetools.csr_sum_duplicates
n_row = M // R
nnz = 0
row_end = 0
for i in range(n_row):
jj = row_end
row_end = self.indptr[i+1]
while jj < row_end:
j = self.indices[jj]
x = self.data[jj]
jj += 1
while jj < row_end and self.indices[jj] == j:
x += self.data[jj]
jj += 1
self.indices[nnz] = j
self.data[nnz] = x
nnz += 1
self.indptr[i+1] = nnz
self.prune() # nnz may have changed
self.has_canonical_format = True
def sort_indices(self):
"""Sort the indices of this matrix *in place*
"""
if self.has_sorted_indices:
return
R,C = self.blocksize
M,N = self.shape
bsr_sort_indices(M//R, N//C, R, C, self.indptr, self.indices, self.data.ravel())
self.has_sorted_indices = True
def prune(self):
""" Remove empty space after all non-zero elements.
"""
R,C = self.blocksize
M,N = self.shape
if len(self.indptr) != M//R + 1:
raise ValueError("index pointer has invalid length")
bnnz = self.indptr[-1]
if len(self.indices) < bnnz:
raise ValueError("indices array has too few elements")
if len(self.data) < bnnz:
raise ValueError("data array has too few elements")
self.data = self.data[:bnnz]
self.indices = self.indices[:bnnz]
# utility functions
def _binopt(self, other, op, in_shape=None, out_shape=None):
"""Apply the binary operation fn to two sparse matrices."""
# Ideally we'd take the GCDs of the blocksize dimensions
# and explode self and other to match.
other = self.__class__(other, blocksize=self.blocksize)
# e.g. bsr_plus_bsr, etc.
fn = getattr(_sparsetools, self.format + op + self.format)
R,C = self.blocksize
max_bnnz = len(self.data) + len(other.data)
idx_dtype = get_index_dtype((self.indptr, self.indices,
other.indptr, other.indices),
maxval=max_bnnz)
indptr = np.empty(self.indptr.shape, dtype=idx_dtype)
indices = np.empty(max_bnnz, dtype=idx_dtype)
bool_ops = ['_ne_', '_lt_', '_gt_', '_le_', '_ge_']
if op in bool_ops:
data = np.empty(R*C*max_bnnz, dtype=np.bool_)
else:
data = np.empty(R*C*max_bnnz, dtype=upcast(self.dtype,other.dtype))
fn(self.shape[0]//R, self.shape[1]//C, R, C,
self.indptr.astype(idx_dtype),
self.indices.astype(idx_dtype),
self.data,
other.indptr.astype(idx_dtype),
other.indices.astype(idx_dtype),
np.ravel(other.data),
indptr,
indices,
data)
actual_bnnz = indptr[-1]
indices = indices[:actual_bnnz]
data = data[:R*C*actual_bnnz]
if actual_bnnz < max_bnnz/2:
indices = indices.copy()
data = data.copy()
data = data.reshape(-1,R,C)
return self.__class__((data, indices, indptr), shape=self.shape)
# needed by _data_matrix
def _with_data(self,data,copy=True):
"""Returns a matrix with the same sparsity structure as self,
but with different data. By default the structure arrays
(i.e. .indptr and .indices) are copied.
"""
if copy:
return self.__class__((data,self.indices.copy(),self.indptr.copy()),
shape=self.shape,dtype=data.dtype)
else:
return self.__class__((data,self.indices,self.indptr),
shape=self.shape,dtype=data.dtype)
# # these functions are used by the parent class
# # to remove redudancy between bsc_matrix and bsr_matrix
# def _swap(self,x):
# """swap the members of x if this is a column-oriented matrix
# """
# return (x[0],x[1])
def isspmatrix_bsr(x):
"""Is x of a bsr_matrix type?
Parameters
----------
x
object to check for being a bsr matrix
Returns
-------
bool
True if x is a bsr matrix, False otherwise
Examples
--------
>>> from scipy.sparse import bsr_matrix, isspmatrix_bsr
>>> isspmatrix_bsr(bsr_matrix([[5]]))
True
>>> from scipy.sparse import bsr_matrix, csr_matrix, isspmatrix_bsr
>>> isspmatrix_bsr(csr_matrix([[5]]))
False
"""
return isinstance(x, bsr_matrix)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,843 @@
"""Functions to construct sparse matrices
"""
__docformat__ = "restructuredtext en"
__all__ = ['spdiags', 'eye', 'identity', 'kron', 'kronsum',
'hstack', 'vstack', 'bmat', 'rand', 'random', 'diags', 'block_diag']
from functools import partial
import numpy as np
from scipy._lib._util import check_random_state, rng_integers
from .sputils import upcast, get_index_dtype, isscalarlike
from .csr import csr_matrix
from .csc import csc_matrix
from .bsr import bsr_matrix
from .coo import coo_matrix
from .dia import dia_matrix
from .base import issparse
def spdiags(data, diags, m, n, format=None):
"""
Return a sparse matrix from diagonals.
Parameters
----------
data : array_like
matrix diagonals stored row-wise
diags : diagonals to set
- k = 0 the main diagonal
- k > 0 the k-th upper diagonal
- k < 0 the k-th lower diagonal
m, n : int
shape of the result
format : str, optional
Format of the result. By default (format=None) an appropriate sparse
matrix format is returned. This choice is subject to change.
See Also
--------
diags : more convenient form of this function
dia_matrix : the sparse DIAgonal format.
Examples
--------
>>> from scipy.sparse import spdiags
>>> data = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]])
>>> diags = np.array([0, -1, 2])
>>> spdiags(data, diags, 4, 4).toarray()
array([[1, 0, 3, 0],
[1, 2, 0, 4],
[0, 2, 3, 0],
[0, 0, 3, 4]])
"""
return dia_matrix((data, diags), shape=(m,n)).asformat(format)
def diags(diagonals, offsets=0, shape=None, format=None, dtype=None):
"""
Construct a sparse matrix from diagonals.
Parameters
----------
diagonals : sequence of array_like
Sequence of arrays containing the matrix diagonals,
corresponding to `offsets`.
offsets : sequence of int or an int, optional
Diagonals to set:
- k = 0 the main diagonal (default)
- k > 0 the kth upper diagonal
- k < 0 the kth lower diagonal
shape : tuple of int, optional
Shape of the result. If omitted, a square matrix large enough
to contain the diagonals is returned.
format : {"dia", "csr", "csc", "lil", ...}, optional
Matrix format of the result. By default (format=None) an
appropriate sparse matrix format is returned. This choice is
subject to change.
dtype : dtype, optional
Data type of the matrix.
See Also
--------
spdiags : construct matrix from diagonals
Notes
-----
This function differs from `spdiags` in the way it handles
off-diagonals.
The result from `diags` is the sparse equivalent of::
np.diag(diagonals[0], offsets[0])
+ ...
+ np.diag(diagonals[k], offsets[k])
Repeated diagonal offsets are disallowed.
.. versionadded:: 0.11
Examples
--------
>>> from scipy.sparse import diags
>>> diagonals = [[1, 2, 3, 4], [1, 2, 3], [1, 2]]
>>> diags(diagonals, [0, -1, 2]).toarray()
array([[1, 0, 1, 0],
[1, 2, 0, 2],
[0, 2, 3, 0],
[0, 0, 3, 4]])
Broadcasting of scalars is supported (but shape needs to be
specified):
>>> diags([1, -2, 1], [-1, 0, 1], shape=(4, 4)).toarray()
array([[-2., 1., 0., 0.],
[ 1., -2., 1., 0.],
[ 0., 1., -2., 1.],
[ 0., 0., 1., -2.]])
If only one diagonal is wanted (as in `numpy.diag`), the following
works as well:
>>> diags([1, 2, 3], 1).toarray()
array([[ 0., 1., 0., 0.],
[ 0., 0., 2., 0.],
[ 0., 0., 0., 3.],
[ 0., 0., 0., 0.]])
"""
# if offsets is not a sequence, assume that there's only one diagonal
if isscalarlike(offsets):
# now check that there's actually only one diagonal
if len(diagonals) == 0 or isscalarlike(diagonals[0]):
diagonals = [np.atleast_1d(diagonals)]
else:
raise ValueError("Different number of diagonals and offsets.")
else:
diagonals = list(map(np.atleast_1d, diagonals))
offsets = np.atleast_1d(offsets)
# Basic check
if len(diagonals) != len(offsets):
raise ValueError("Different number of diagonals and offsets.")
# Determine shape, if omitted
if shape is None:
m = len(diagonals[0]) + abs(int(offsets[0]))
shape = (m, m)
# Determine data type, if omitted
if dtype is None:
dtype = np.common_type(*diagonals)
# Construct data array
m, n = shape
M = max([min(m + offset, n - offset) + max(0, offset)
for offset in offsets])
M = max(0, M)
data_arr = np.zeros((len(offsets), M), dtype=dtype)
K = min(m, n)
for j, diagonal in enumerate(diagonals):
offset = offsets[j]
k = max(0, offset)
length = min(m + offset, n - offset, K)
if length < 0:
raise ValueError("Offset %d (index %d) out of bounds" % (offset, j))
try:
data_arr[j, k:k+length] = diagonal[...,:length]
except ValueError:
if len(diagonal) != length and len(diagonal) != 1:
raise ValueError(
"Diagonal length (index %d: %d at offset %d) does not "
"agree with matrix size (%d, %d)." % (
j, len(diagonal), offset, m, n))
raise
return dia_matrix((data_arr, offsets), shape=(m, n)).asformat(format)
def identity(n, dtype='d', format=None):
"""Identity matrix in sparse format
Returns an identity matrix with shape (n,n) using a given
sparse format and dtype.
Parameters
----------
n : int
Shape of the identity matrix.
dtype : dtype, optional
Data type of the matrix
format : str, optional
Sparse format of the result, e.g., format="csr", etc.
Examples
--------
>>> from scipy.sparse import identity
>>> identity(3).toarray()
array([[ 1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]])
>>> identity(3, dtype='int8', format='dia')
<3x3 sparse matrix of type '<class 'numpy.int8'>'
with 3 stored elements (1 diagonals) in DIAgonal format>
"""
return eye(n, n, dtype=dtype, format=format)
def eye(m, n=None, k=0, dtype=float, format=None):
"""Sparse matrix with ones on diagonal
Returns a sparse (m x n) matrix where the kth diagonal
is all ones and everything else is zeros.
Parameters
----------
m : int
Number of rows in the matrix.
n : int, optional
Number of columns. Default: `m`.
k : int, optional
Diagonal to place ones on. Default: 0 (main diagonal).
dtype : dtype, optional
Data type of the matrix.
format : str, optional
Sparse format of the result, e.g., format="csr", etc.
Examples
--------
>>> from scipy import sparse
>>> sparse.eye(3).toarray()
array([[ 1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]])
>>> sparse.eye(3, dtype=np.int8)
<3x3 sparse matrix of type '<class 'numpy.int8'>'
with 3 stored elements (1 diagonals) in DIAgonal format>
"""
if n is None:
n = m
m,n = int(m),int(n)
if m == n and k == 0:
# fast branch for special formats
if format in ['csr', 'csc']:
idx_dtype = get_index_dtype(maxval=n)
indptr = np.arange(n+1, dtype=idx_dtype)
indices = np.arange(n, dtype=idx_dtype)
data = np.ones(n, dtype=dtype)
cls = {'csr': csr_matrix, 'csc': csc_matrix}[format]
return cls((data,indices,indptr),(n,n))
elif format == 'coo':
idx_dtype = get_index_dtype(maxval=n)
row = np.arange(n, dtype=idx_dtype)
col = np.arange(n, dtype=idx_dtype)
data = np.ones(n, dtype=dtype)
return coo_matrix((data,(row,col)),(n,n))
diags = np.ones((1, max(0, min(m + k, n))), dtype=dtype)
return spdiags(diags, k, m, n).asformat(format)
def kron(A, B, format=None):
"""kronecker product of sparse matrices A and B
Parameters
----------
A : sparse or dense matrix
first matrix of the product
B : sparse or dense matrix
second matrix of the product
format : str, optional
format of the result (e.g. "csr")
Returns
-------
kronecker product in a sparse matrix format
Examples
--------
>>> from scipy import sparse
>>> A = sparse.csr_matrix(np.array([[0, 2], [5, 0]]))
>>> B = sparse.csr_matrix(np.array([[1, 2], [3, 4]]))
>>> sparse.kron(A, B).toarray()
array([[ 0, 0, 2, 4],
[ 0, 0, 6, 8],
[ 5, 10, 0, 0],
[15, 20, 0, 0]])
>>> sparse.kron(A, [[1, 2], [3, 4]]).toarray()
array([[ 0, 0, 2, 4],
[ 0, 0, 6, 8],
[ 5, 10, 0, 0],
[15, 20, 0, 0]])
"""
B = coo_matrix(B)
if (format is None or format == "bsr") and 2*B.nnz >= B.shape[0] * B.shape[1]:
# B is fairly dense, use BSR
A = csr_matrix(A,copy=True)
output_shape = (A.shape[0]*B.shape[0], A.shape[1]*B.shape[1])
if A.nnz == 0 or B.nnz == 0:
# kronecker product is the zero matrix
return coo_matrix(output_shape)
B = B.toarray()
data = A.data.repeat(B.size).reshape(-1,B.shape[0],B.shape[1])
data = data * B
return bsr_matrix((data,A.indices,A.indptr), shape=output_shape)
else:
# use COO
A = coo_matrix(A)
output_shape = (A.shape[0]*B.shape[0], A.shape[1]*B.shape[1])
if A.nnz == 0 or B.nnz == 0:
# kronecker product is the zero matrix
return coo_matrix(output_shape)
# expand entries of a into blocks
row = A.row.repeat(B.nnz)
col = A.col.repeat(B.nnz)
data = A.data.repeat(B.nnz)
if max(A.shape[0]*B.shape[0], A.shape[1]*B.shape[1]) > np.iinfo('int32').max:
row = row.astype(np.int64)
col = col.astype(np.int64)
row *= B.shape[0]
col *= B.shape[1]
# increment block indices
row,col = row.reshape(-1,B.nnz),col.reshape(-1,B.nnz)
row += B.row
col += B.col
row,col = row.reshape(-1),col.reshape(-1)
# compute block entries
data = data.reshape(-1,B.nnz) * B.data
data = data.reshape(-1)
return coo_matrix((data,(row,col)), shape=output_shape).asformat(format)
def kronsum(A, B, format=None):
"""kronecker sum of sparse matrices A and B
Kronecker sum of two sparse matrices is a sum of two Kronecker
products kron(I_n,A) + kron(B,I_m) where A has shape (m,m)
and B has shape (n,n) and I_m and I_n are identity matrices
of shape (m,m) and (n,n), respectively.
Parameters
----------
A
square matrix
B
square matrix
format : str
format of the result (e.g. "csr")
Returns
-------
kronecker sum in a sparse matrix format
Examples
--------
"""
A = coo_matrix(A)
B = coo_matrix(B)
if A.shape[0] != A.shape[1]:
raise ValueError('A is not square')
if B.shape[0] != B.shape[1]:
raise ValueError('B is not square')
dtype = upcast(A.dtype, B.dtype)
L = kron(eye(B.shape[0],dtype=dtype), A, format=format)
R = kron(B, eye(A.shape[0],dtype=dtype), format=format)
return (L+R).asformat(format) # since L + R is not always same format
def _compressed_sparse_stack(blocks, axis):
"""
Stacking fast path for CSR/CSC matrices
(i) vstack for CSR, (ii) hstack for CSC.
"""
other_axis = 1 if axis == 0 else 0
data = np.concatenate([b.data for b in blocks])
constant_dim = blocks[0].shape[other_axis]
idx_dtype = get_index_dtype(arrays=[b.indptr for b in blocks],
maxval=max(data.size, constant_dim))
indices = np.empty(data.size, dtype=idx_dtype)
indptr = np.empty(sum(b.shape[axis] for b in blocks) + 1, dtype=idx_dtype)
last_indptr = idx_dtype(0)
sum_dim = 0
sum_indices = 0
for b in blocks:
if b.shape[other_axis] != constant_dim:
raise ValueError('incompatible dimensions for axis %d' % other_axis)
indices[sum_indices:sum_indices+b.indices.size] = b.indices
sum_indices += b.indices.size
idxs = slice(sum_dim, sum_dim + b.shape[axis])
indptr[idxs] = b.indptr[:-1]
indptr[idxs] += last_indptr
sum_dim += b.shape[axis]
last_indptr += b.indptr[-1]
indptr[-1] = last_indptr
if axis == 0:
return csr_matrix((data, indices, indptr),
shape=(sum_dim, constant_dim))
else:
return csc_matrix((data, indices, indptr),
shape=(constant_dim, sum_dim))
def hstack(blocks, format=None, dtype=None):
"""
Stack sparse matrices horizontally (column wise)
Parameters
----------
blocks
sequence of sparse matrices with compatible shapes
format : str
sparse format of the result (e.g., "csr")
by default an appropriate sparse matrix format is returned.
This choice is subject to change.
dtype : dtype, optional
The data-type of the output matrix. If not given, the dtype is
determined from that of `blocks`.
See Also
--------
vstack : stack sparse matrices vertically (row wise)
Examples
--------
>>> from scipy.sparse import coo_matrix, hstack
>>> A = coo_matrix([[1, 2], [3, 4]])
>>> B = coo_matrix([[5], [6]])
>>> hstack([A,B]).toarray()
array([[1, 2, 5],
[3, 4, 6]])
"""
return bmat([blocks], format=format, dtype=dtype)
def vstack(blocks, format=None, dtype=None):
"""
Stack sparse matrices vertically (row wise)
Parameters
----------
blocks
sequence of sparse matrices with compatible shapes
format : str, optional
sparse format of the result (e.g., "csr")
by default an appropriate sparse matrix format is returned.
This choice is subject to change.
dtype : dtype, optional
The data-type of the output matrix. If not given, the dtype is
determined from that of `blocks`.
See Also
--------
hstack : stack sparse matrices horizontally (column wise)
Examples
--------
>>> from scipy.sparse import coo_matrix, vstack
>>> A = coo_matrix([[1, 2], [3, 4]])
>>> B = coo_matrix([[5, 6]])
>>> vstack([A, B]).toarray()
array([[1, 2],
[3, 4],
[5, 6]])
"""
return bmat([[b] for b in blocks], format=format, dtype=dtype)
def bmat(blocks, format=None, dtype=None):
"""
Build a sparse matrix from sparse sub-blocks
Parameters
----------
blocks : array_like
Grid of sparse matrices with compatible shapes.
An entry of None implies an all-zero matrix.
format : {'bsr', 'coo', 'csc', 'csr', 'dia', 'dok', 'lil'}, optional
The sparse format of the result (e.g. "csr"). By default an
appropriate sparse matrix format is returned.
This choice is subject to change.
dtype : dtype, optional
The data-type of the output matrix. If not given, the dtype is
determined from that of `blocks`.
Returns
-------
bmat : sparse matrix
See Also
--------
block_diag, diags
Examples
--------
>>> from scipy.sparse import coo_matrix, bmat
>>> A = coo_matrix([[1, 2], [3, 4]])
>>> B = coo_matrix([[5], [6]])
>>> C = coo_matrix([[7]])
>>> bmat([[A, B], [None, C]]).toarray()
array([[1, 2, 5],
[3, 4, 6],
[0, 0, 7]])
>>> bmat([[A, None], [None, C]]).toarray()
array([[1, 2, 0],
[3, 4, 0],
[0, 0, 7]])
"""
blocks = np.asarray(blocks, dtype='object')
if blocks.ndim != 2:
raise ValueError('blocks must be 2-D')
M,N = blocks.shape
# check for fast path cases
if (N == 1 and format in (None, 'csr') and all(isinstance(b, csr_matrix)
for b in blocks.flat)):
A = _compressed_sparse_stack(blocks[:,0], 0)
if dtype is not None:
A = A.astype(dtype)
return A
elif (M == 1 and format in (None, 'csc')
and all(isinstance(b, csc_matrix) for b in blocks.flat)):
A = _compressed_sparse_stack(blocks[0,:], 1)
if dtype is not None:
A = A.astype(dtype)
return A
block_mask = np.zeros(blocks.shape, dtype=bool)
brow_lengths = np.zeros(M, dtype=np.int64)
bcol_lengths = np.zeros(N, dtype=np.int64)
# convert everything to COO format
for i in range(M):
for j in range(N):
if blocks[i,j] is not None:
A = coo_matrix(blocks[i,j])
blocks[i,j] = A
block_mask[i,j] = True
if brow_lengths[i] == 0:
brow_lengths[i] = A.shape[0]
elif brow_lengths[i] != A.shape[0]:
msg = ('blocks[{i},:] has incompatible row dimensions. '
'Got blocks[{i},{j}].shape[0] == {got}, '
'expected {exp}.'.format(i=i, j=j,
exp=brow_lengths[i],
got=A.shape[0]))
raise ValueError(msg)
if bcol_lengths[j] == 0:
bcol_lengths[j] = A.shape[1]
elif bcol_lengths[j] != A.shape[1]:
msg = ('blocks[:,{j}] has incompatible row dimensions. '
'Got blocks[{i},{j}].shape[1] == {got}, '
'expected {exp}.'.format(i=i, j=j,
exp=bcol_lengths[j],
got=A.shape[1]))
raise ValueError(msg)
nnz = sum(block.nnz for block in blocks[block_mask])
if dtype is None:
all_dtypes = [blk.dtype for blk in blocks[block_mask]]
dtype = upcast(*all_dtypes) if all_dtypes else None
row_offsets = np.append(0, np.cumsum(brow_lengths))
col_offsets = np.append(0, np.cumsum(bcol_lengths))
shape = (row_offsets[-1], col_offsets[-1])
data = np.empty(nnz, dtype=dtype)
idx_dtype = get_index_dtype(maxval=max(shape))
row = np.empty(nnz, dtype=idx_dtype)
col = np.empty(nnz, dtype=idx_dtype)
nnz = 0
ii, jj = np.nonzero(block_mask)
for i, j in zip(ii, jj):
B = blocks[i, j]
idx = slice(nnz, nnz + B.nnz)
data[idx] = B.data
row[idx] = B.row + row_offsets[i]
col[idx] = B.col + col_offsets[j]
nnz += B.nnz
return coo_matrix((data, (row, col)), shape=shape).asformat(format)
def block_diag(mats, format=None, dtype=None):
"""
Build a block diagonal sparse matrix from provided matrices.
Parameters
----------
mats : sequence of matrices
Input matrices.
format : str, optional
The sparse format of the result (e.g., "csr"). If not given, the matrix
is returned in "coo" format.
dtype : dtype specifier, optional
The data-type of the output matrix. If not given, the dtype is
determined from that of `blocks`.
Returns
-------
res : sparse matrix
Notes
-----
.. versionadded:: 0.11.0
See Also
--------
bmat, diags
Examples
--------
>>> from scipy.sparse import coo_matrix, block_diag
>>> A = coo_matrix([[1, 2], [3, 4]])
>>> B = coo_matrix([[5], [6]])
>>> C = coo_matrix([[7]])
>>> block_diag((A, B, C)).toarray()
array([[1, 2, 0, 0],
[3, 4, 0, 0],
[0, 0, 5, 0],
[0, 0, 6, 0],
[0, 0, 0, 7]])
"""
nmat = len(mats)
rows = []
for ia, a in enumerate(mats):
row = [None]*nmat
if issparse(a):
row[ia] = a
else:
row[ia] = coo_matrix(a)
rows.append(row)
return bmat(rows, format=format, dtype=dtype)
def random(m, n, density=0.01, format='coo', dtype=None,
random_state=None, data_rvs=None):
"""Generate a sparse matrix of the given shape and density with randomly
distributed values.
Parameters
----------
m, n : int
shape of the matrix
density : real, optional
density of the generated matrix: density equal to one means a full
matrix, density of 0 means a matrix with no non-zero items.
format : str, optional
sparse matrix format.
dtype : dtype, optional
type of the returned matrix values.
random_state : {numpy.random.RandomState, int}, optional
Random number generator or random seed. If not given, the singleton
numpy.random will be used. This random state will be used
for sampling the sparsity structure, but not necessarily for sampling
the values of the structurally nonzero entries of the matrix.
data_rvs : callable, optional
Samples a requested number of random values.
This function should take a single argument specifying the length
of the ndarray that it will return. The structurally nonzero entries
of the sparse random matrix will be taken from the array sampled
by this function. By default, uniform [0, 1) random values will be
sampled using the same random state as is used for sampling
the sparsity structure.
Returns
-------
res : sparse matrix
Notes
-----
Only float types are supported for now.
Examples
--------
>>> from scipy.sparse import random
>>> from scipy import stats
>>> class CustomRandomState(np.random.RandomState):
... def randint(self, k):
... i = np.random.randint(k)
... return i - i % 2
>>> np.random.seed(12345)
>>> rs = CustomRandomState()
>>> rvs = stats.poisson(25, loc=10).rvs
>>> S = random(3, 4, density=0.25, random_state=rs, data_rvs=rvs)
>>> S.A
array([[ 36., 0., 33., 0.], # random
[ 0., 0., 0., 0.],
[ 0., 0., 36., 0.]])
>>> from scipy.sparse import random
>>> from scipy.stats import rv_continuous
>>> class CustomDistribution(rv_continuous):
... def _rvs(self, size=None, random_state=None):
... return random_state.randn(*size)
>>> X = CustomDistribution(seed=2906)
>>> Y = X() # get a frozen version of the distribution
>>> S = random(3, 4, density=0.25, random_state=2906, data_rvs=Y.rvs)
>>> S.A
array([[ 0. , 0. , 0. , 0. ],
[ 0.13569738, 1.9467163 , -0.81205367, 0. ],
[ 0. , 0. , 0. , 0. ]])
"""
if density < 0 or density > 1:
raise ValueError("density expected to be 0 <= density <= 1")
dtype = np.dtype(dtype)
mn = m * n
tp = np.intc
if mn > np.iinfo(tp).max:
tp = np.int64
if mn > np.iinfo(tp).max:
msg = """\
Trying to generate a random sparse matrix such as the product of dimensions is
greater than %d - this is not supported on this machine
"""
raise ValueError(msg % np.iinfo(tp).max)
# Number of non zero values
k = int(round(density * m * n))
random_state = check_random_state(random_state)
if data_rvs is None:
if np.issubdtype(dtype, np.integer):
def data_rvs(n):
return rng_integers(random_state,
np.iinfo(dtype).min,
np.iinfo(dtype).max,
n,
dtype=dtype)
elif np.issubdtype(dtype, np.complexfloating):
def data_rvs(n):
return (random_state.uniform(size=n) +
random_state.uniform(size=n) * 1j)
else:
data_rvs = partial(random_state.uniform, 0., 1.)
ind = random_state.choice(mn, size=k, replace=False)
j = np.floor(ind * 1. / m).astype(tp, copy=False)
i = (ind - j * m).astype(tp, copy=False)
vals = data_rvs(k).astype(dtype, copy=False)
return coo_matrix((vals, (i, j)), shape=(m, n)).asformat(format,
copy=False)
def rand(m, n, density=0.01, format="coo", dtype=None, random_state=None):
"""Generate a sparse matrix of the given shape and density with uniformly
distributed values.
Parameters
----------
m, n : int
shape of the matrix
density : real, optional
density of the generated matrix: density equal to one means a full
matrix, density of 0 means a matrix with no non-zero items.
format : str, optional
sparse matrix format.
dtype : dtype, optional
type of the returned matrix values.
random_state : {numpy.random.RandomState, int, np.random.Generator}, optional
Random number generator or random seed. If not given, the singleton
numpy.random will be used.
Returns
-------
res : sparse matrix
Notes
-----
Only float types are supported for now.
See Also
--------
scipy.sparse.random : Similar function that allows a user-specified random
data source.
Examples
--------
>>> from scipy.sparse import rand
>>> matrix = rand(3, 4, density=0.25, format="csr", random_state=42)
>>> matrix
<3x4 sparse matrix of type '<class 'numpy.float64'>'
with 3 stored elements in Compressed Sparse Row format>
>>> matrix.todense()
matrix([[0.05641158, 0. , 0. , 0.65088847],
[0. , 0. , 0. , 0.14286682],
[0. , 0. , 0. , 0. ]])
"""
return random(m, n, density, format, dtype, random_state)

View file

@ -0,0 +1,617 @@
""" A sparse matrix in COOrdinate or 'triplet' format"""
__docformat__ = "restructuredtext en"
__all__ = ['coo_matrix', 'isspmatrix_coo']
from warnings import warn
import numpy as np
from ._sparsetools import coo_tocsr, coo_todense, coo_matvec
from .base import isspmatrix, SparseEfficiencyWarning, spmatrix
from .data import _data_matrix, _minmax_mixin
from .sputils import (upcast, upcast_char, to_native, isshape, getdtype,
get_index_dtype, downcast_intp_index, check_shape,
check_reshape_kwargs, matrix)
import operator
class coo_matrix(_data_matrix, _minmax_mixin):
"""
A sparse matrix in COOrdinate format.
Also known as the 'ijv' or 'triplet' format.
This can be instantiated in several ways:
coo_matrix(D)
with a dense matrix D
coo_matrix(S)
with another sparse matrix S (equivalent to S.tocoo())
coo_matrix((M, N), [dtype])
to construct an empty matrix with shape (M, N)
dtype is optional, defaulting to dtype='d'.
coo_matrix((data, (i, j)), [shape=(M, N)])
to construct from three arrays:
1. data[:] the entries of the matrix, in any order
2. i[:] the row indices of the matrix entries
3. j[:] the column indices of the matrix entries
Where ``A[i[k], j[k]] = data[k]``. When shape is not
specified, it is inferred from the index arrays
Attributes
----------
dtype : dtype
Data type of the matrix
shape : 2-tuple
Shape of the matrix
ndim : int
Number of dimensions (this is always 2)
nnz
Number of stored values, including explicit zeros
data
COO format data array of the matrix
row
COO format row index array of the matrix
col
COO format column index array of the matrix
Notes
-----
Sparse matrices can be used in arithmetic operations: they support
addition, subtraction, multiplication, division, and matrix power.
Advantages of the COO format
- facilitates fast conversion among sparse formats
- permits duplicate entries (see example)
- very fast conversion to and from CSR/CSC formats
Disadvantages of the COO format
- does not directly support:
+ arithmetic operations
+ slicing
Intended Usage
- COO is a fast format for constructing sparse matrices
- Once a matrix has been constructed, convert to CSR or
CSC format for fast arithmetic and matrix vector operations
- By default when converting to CSR or CSC format, duplicate (i,j)
entries will be summed together. This facilitates efficient
construction of finite element matrices and the like. (see example)
Examples
--------
>>> # Constructing an empty matrix
>>> from scipy.sparse import coo_matrix
>>> coo_matrix((3, 4), dtype=np.int8).toarray()
array([[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]], dtype=int8)
>>> # Constructing a matrix using ijv format
>>> row = np.array([0, 3, 1, 0])
>>> col = np.array([0, 3, 1, 2])
>>> data = np.array([4, 5, 7, 9])
>>> coo_matrix((data, (row, col)), shape=(4, 4)).toarray()
array([[4, 0, 9, 0],
[0, 7, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 5]])
>>> # Constructing a matrix with duplicate indices
>>> row = np.array([0, 0, 1, 3, 1, 0, 0])
>>> col = np.array([0, 2, 1, 3, 1, 0, 0])
>>> data = np.array([1, 1, 1, 1, 1, 1, 1])
>>> coo = coo_matrix((data, (row, col)), shape=(4, 4))
>>> # Duplicate indices are maintained until implicitly or explicitly summed
>>> np.max(coo.data)
1
>>> coo.toarray()
array([[3, 0, 1, 0],
[0, 2, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 1]])
"""
format = 'coo'
def __init__(self, arg1, shape=None, dtype=None, copy=False):
_data_matrix.__init__(self)
if isinstance(arg1, tuple):
if isshape(arg1):
M, N = arg1
self._shape = check_shape((M, N))
idx_dtype = get_index_dtype(maxval=max(M, N))
self.row = np.array([], dtype=idx_dtype)
self.col = np.array([], dtype=idx_dtype)
self.data = np.array([], getdtype(dtype, default=float))
self.has_canonical_format = True
else:
try:
obj, (row, col) = arg1
except (TypeError, ValueError):
raise TypeError('invalid input format')
if shape is None:
if len(row) == 0 or len(col) == 0:
raise ValueError('cannot infer dimensions from zero '
'sized index arrays')
M = operator.index(np.max(row)) + 1
N = operator.index(np.max(col)) + 1
self._shape = check_shape((M, N))
else:
# Use 2 steps to ensure shape has length 2.
M, N = shape
self._shape = check_shape((M, N))
idx_dtype = get_index_dtype(maxval=max(self.shape))
self.row = np.array(row, copy=copy, dtype=idx_dtype)
self.col = np.array(col, copy=copy, dtype=idx_dtype)
self.data = np.array(obj, copy=copy)
self.has_canonical_format = False
else:
if isspmatrix(arg1):
if isspmatrix_coo(arg1) and copy:
self.row = arg1.row.copy()
self.col = arg1.col.copy()
self.data = arg1.data.copy()
self._shape = check_shape(arg1.shape)
else:
coo = arg1.tocoo()
self.row = coo.row
self.col = coo.col
self.data = coo.data
self._shape = check_shape(coo.shape)
self.has_canonical_format = False
else:
#dense argument
M = np.atleast_2d(np.asarray(arg1))
if M.ndim != 2:
raise TypeError('expected dimension <= 2 array or matrix')
self._shape = check_shape(M.shape)
if shape is not None:
if check_shape(shape) != self._shape:
raise ValueError('inconsistent shapes: %s != %s' %
(shape, self._shape))
self.row, self.col = M.nonzero()
self.data = M[self.row, self.col]
self.has_canonical_format = True
if dtype is not None:
self.data = self.data.astype(dtype, copy=False)
self._check()
def reshape(self, *args, **kwargs):
shape = check_shape(args, self.shape)
order, copy = check_reshape_kwargs(kwargs)
# Return early if reshape is not required
if shape == self.shape:
if copy:
return self.copy()
else:
return self
nrows, ncols = self.shape
if order == 'C':
# Upcast to avoid overflows: the coo_matrix constructor
# below will downcast the results to a smaller dtype, if
# possible.
dtype = get_index_dtype(maxval=(ncols * max(0, nrows - 1) + max(0, ncols - 1)))
flat_indices = np.multiply(ncols, self.row, dtype=dtype) + self.col
new_row, new_col = divmod(flat_indices, shape[1])
elif order == 'F':
dtype = get_index_dtype(maxval=(nrows * max(0, ncols - 1) + max(0, nrows - 1)))
flat_indices = np.multiply(nrows, self.col, dtype=dtype) + self.row
new_col, new_row = divmod(flat_indices, shape[0])
else:
raise ValueError("'order' must be 'C' or 'F'")
# Handle copy here rather than passing on to the constructor so that no
# copy will be made of new_row and new_col regardless
if copy:
new_data = self.data.copy()
else:
new_data = self.data
return coo_matrix((new_data, (new_row, new_col)),
shape=shape, copy=False)
reshape.__doc__ = spmatrix.reshape.__doc__
def getnnz(self, axis=None):
if axis is None:
nnz = len(self.data)
if nnz != len(self.row) or nnz != len(self.col):
raise ValueError('row, column, and data array must all be the '
'same length')
if self.data.ndim != 1 or self.row.ndim != 1 or \
self.col.ndim != 1:
raise ValueError('row, column, and data arrays must be 1-D')
return int(nnz)
if axis < 0:
axis += 2
if axis == 0:
return np.bincount(downcast_intp_index(self.col),
minlength=self.shape[1])
elif axis == 1:
return np.bincount(downcast_intp_index(self.row),
minlength=self.shape[0])
else:
raise ValueError('axis out of bounds')
getnnz.__doc__ = spmatrix.getnnz.__doc__
def _check(self):
""" Checks data structure for consistency """
# index arrays should have integer data types
if self.row.dtype.kind != 'i':
warn("row index array has non-integer dtype (%s) "
% self.row.dtype.name)
if self.col.dtype.kind != 'i':
warn("col index array has non-integer dtype (%s) "
% self.col.dtype.name)
idx_dtype = get_index_dtype(maxval=max(self.shape))
self.row = np.asarray(self.row, dtype=idx_dtype)
self.col = np.asarray(self.col, dtype=idx_dtype)
self.data = to_native(self.data)
if self.nnz > 0:
if self.row.max() >= self.shape[0]:
raise ValueError('row index exceeds matrix dimensions')
if self.col.max() >= self.shape[1]:
raise ValueError('column index exceeds matrix dimensions')
if self.row.min() < 0:
raise ValueError('negative row index found')
if self.col.min() < 0:
raise ValueError('negative column index found')
def transpose(self, axes=None, copy=False):
if axes is not None:
raise ValueError(("Sparse matrices do not support "
"an 'axes' parameter because swapping "
"dimensions is the only logical permutation."))
M, N = self.shape
return coo_matrix((self.data, (self.col, self.row)),
shape=(N, M), copy=copy)
transpose.__doc__ = spmatrix.transpose.__doc__
def resize(self, *shape):
shape = check_shape(shape)
new_M, new_N = shape
M, N = self.shape
if new_M < M or new_N < N:
mask = np.logical_and(self.row < new_M, self.col < new_N)
if not mask.all():
self.row = self.row[mask]
self.col = self.col[mask]
self.data = self.data[mask]
self._shape = shape
resize.__doc__ = spmatrix.resize.__doc__
def toarray(self, order=None, out=None):
"""See the docstring for `spmatrix.toarray`."""
B = self._process_toarray_args(order, out)
fortran = int(B.flags.f_contiguous)
if not fortran and not B.flags.c_contiguous:
raise ValueError("Output array must be C or F contiguous")
M,N = self.shape
coo_todense(M, N, self.nnz, self.row, self.col, self.data,
B.ravel('A'), fortran)
return B
def tocsc(self, copy=False):
"""Convert this matrix to Compressed Sparse Column format
Duplicate entries will be summed together.
Examples
--------
>>> from numpy import array
>>> from scipy.sparse import coo_matrix
>>> row = array([0, 0, 1, 3, 1, 0, 0])
>>> col = array([0, 2, 1, 3, 1, 0, 0])
>>> data = array([1, 1, 1, 1, 1, 1, 1])
>>> A = coo_matrix((data, (row, col)), shape=(4, 4)).tocsc()
>>> A.toarray()
array([[3, 0, 1, 0],
[0, 2, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 1]])
"""
from .csc import csc_matrix
if self.nnz == 0:
return csc_matrix(self.shape, dtype=self.dtype)
else:
M,N = self.shape
idx_dtype = get_index_dtype((self.col, self.row),
maxval=max(self.nnz, M))
row = self.row.astype(idx_dtype, copy=False)
col = self.col.astype(idx_dtype, copy=False)
indptr = np.empty(N + 1, dtype=idx_dtype)
indices = np.empty_like(row, dtype=idx_dtype)
data = np.empty_like(self.data, dtype=upcast(self.dtype))
coo_tocsr(N, M, self.nnz, col, row, self.data,
indptr, indices, data)
x = csc_matrix((data, indices, indptr), shape=self.shape)
if not self.has_canonical_format:
x.sum_duplicates()
return x
def tocsr(self, copy=False):
"""Convert this matrix to Compressed Sparse Row format
Duplicate entries will be summed together.
Examples
--------
>>> from numpy import array
>>> from scipy.sparse import coo_matrix
>>> row = array([0, 0, 1, 3, 1, 0, 0])
>>> col = array([0, 2, 1, 3, 1, 0, 0])
>>> data = array([1, 1, 1, 1, 1, 1, 1])
>>> A = coo_matrix((data, (row, col)), shape=(4, 4)).tocsr()
>>> A.toarray()
array([[3, 0, 1, 0],
[0, 2, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 1]])
"""
from .csr import csr_matrix
if self.nnz == 0:
return csr_matrix(self.shape, dtype=self.dtype)
else:
M,N = self.shape
idx_dtype = get_index_dtype((self.row, self.col),
maxval=max(self.nnz, N))
row = self.row.astype(idx_dtype, copy=False)
col = self.col.astype(idx_dtype, copy=False)
indptr = np.empty(M + 1, dtype=idx_dtype)
indices = np.empty_like(col, dtype=idx_dtype)
data = np.empty_like(self.data, dtype=upcast(self.dtype))
coo_tocsr(M, N, self.nnz, row, col, self.data,
indptr, indices, data)
x = csr_matrix((data, indices, indptr), shape=self.shape)
if not self.has_canonical_format:
x.sum_duplicates()
return x
def tocoo(self, copy=False):
if copy:
return self.copy()
else:
return self
tocoo.__doc__ = spmatrix.tocoo.__doc__
def todia(self, copy=False):
from .dia import dia_matrix
self.sum_duplicates()
ks = self.col - self.row # the diagonal for each nonzero
diags, diag_idx = np.unique(ks, return_inverse=True)
if len(diags) > 100:
# probably undesired, should todia() have a maxdiags parameter?
warn("Constructing a DIA matrix with %d diagonals "
"is inefficient" % len(diags), SparseEfficiencyWarning)
#initialize and fill in data array
if self.data.size == 0:
data = np.zeros((0, 0), dtype=self.dtype)
else:
data = np.zeros((len(diags), self.col.max()+1), dtype=self.dtype)
data[diag_idx, self.col] = self.data
return dia_matrix((data,diags), shape=self.shape)
todia.__doc__ = spmatrix.todia.__doc__
def todok(self, copy=False):
from .dok import dok_matrix
self.sum_duplicates()
dok = dok_matrix((self.shape), dtype=self.dtype)
dok._update(zip(zip(self.row,self.col),self.data))
return dok
todok.__doc__ = spmatrix.todok.__doc__
def diagonal(self, k=0):
rows, cols = self.shape
if k <= -rows or k >= cols:
return np.empty(0, dtype=self.data.dtype)
diag = np.zeros(min(rows + min(k, 0), cols - max(k, 0)),
dtype=self.dtype)
diag_mask = (self.row + k) == self.col
if self.has_canonical_format:
row = self.row[diag_mask]
data = self.data[diag_mask]
else:
row, _, data = self._sum_duplicates(self.row[diag_mask],
self.col[diag_mask],
self.data[diag_mask])
diag[row + min(k, 0)] = data
return diag
diagonal.__doc__ = _data_matrix.diagonal.__doc__
def _setdiag(self, values, k):
M, N = self.shape
if values.ndim and not len(values):
return
idx_dtype = self.row.dtype
# Determine which triples to keep and where to put the new ones.
full_keep = self.col - self.row != k
if k < 0:
max_index = min(M+k, N)
if values.ndim:
max_index = min(max_index, len(values))
keep = np.logical_or(full_keep, self.col >= max_index)
new_row = np.arange(-k, -k + max_index, dtype=idx_dtype)
new_col = np.arange(max_index, dtype=idx_dtype)
else:
max_index = min(M, N-k)
if values.ndim:
max_index = min(max_index, len(values))
keep = np.logical_or(full_keep, self.row >= max_index)
new_row = np.arange(max_index, dtype=idx_dtype)
new_col = np.arange(k, k + max_index, dtype=idx_dtype)
# Define the array of data consisting of the entries to be added.
if values.ndim:
new_data = values[:max_index]
else:
new_data = np.empty(max_index, dtype=self.dtype)
new_data[:] = values
# Update the internal structure.
self.row = np.concatenate((self.row[keep], new_row))
self.col = np.concatenate((self.col[keep], new_col))
self.data = np.concatenate((self.data[keep], new_data))
self.has_canonical_format = False
# needed by _data_matrix
def _with_data(self,data,copy=True):
"""Returns a matrix with the same sparsity structure as self,
but with different data. By default the index arrays
(i.e. .row and .col) are copied.
"""
if copy:
return coo_matrix((data, (self.row.copy(), self.col.copy())),
shape=self.shape, dtype=data.dtype)
else:
return coo_matrix((data, (self.row, self.col)),
shape=self.shape, dtype=data.dtype)
def sum_duplicates(self):
"""Eliminate duplicate matrix entries by adding them together
This is an *in place* operation
"""
if self.has_canonical_format:
return
summed = self._sum_duplicates(self.row, self.col, self.data)
self.row, self.col, self.data = summed
self.has_canonical_format = True
def _sum_duplicates(self, row, col, data):
# Assumes (data, row, col) not in canonical format.
if len(data) == 0:
return row, col, data
order = np.lexsort((row, col))
row = row[order]
col = col[order]
data = data[order]
unique_mask = ((row[1:] != row[:-1]) |
(col[1:] != col[:-1]))
unique_mask = np.append(True, unique_mask)
row = row[unique_mask]
col = col[unique_mask]
unique_inds, = np.nonzero(unique_mask)
data = np.add.reduceat(data, unique_inds, dtype=self.dtype)
return row, col, data
def eliminate_zeros(self):
"""Remove zero entries from the matrix
This is an *in place* operation
"""
mask = self.data != 0
self.data = self.data[mask]
self.row = self.row[mask]
self.col = self.col[mask]
#######################
# Arithmetic handlers #
#######################
def _add_dense(self, other):
if other.shape != self.shape:
raise ValueError('Incompatible shapes.')
dtype = upcast_char(self.dtype.char, other.dtype.char)
result = np.array(other, dtype=dtype, copy=True)
fortran = int(result.flags.f_contiguous)
M, N = self.shape
coo_todense(M, N, self.nnz, self.row, self.col, self.data,
result.ravel('A'), fortran)
return matrix(result, copy=False)
def _mul_vector(self, other):
#output array
result = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char,
other.dtype.char))
coo_matvec(self.nnz, self.row, self.col, self.data, other, result)
return result
def _mul_multivector(self, other):
result = np.zeros((other.shape[1], self.shape[0]),
dtype=upcast_char(self.dtype.char, other.dtype.char))
for i, col in enumerate(other.T):
coo_matvec(self.nnz, self.row, self.col, self.data, col, result[i])
return result.T.view(type=type(other))
def isspmatrix_coo(x):
"""Is x of coo_matrix type?
Parameters
----------
x
object to check for being a coo matrix
Returns
-------
bool
True if x is a coo matrix, False otherwise
Examples
--------
>>> from scipy.sparse import coo_matrix, isspmatrix_coo
>>> isspmatrix_coo(coo_matrix([[5]]))
True
>>> from scipy.sparse import coo_matrix, csr_matrix, isspmatrix_coo
>>> isspmatrix_coo(csr_matrix([[5]]))
False
"""
return isinstance(x, coo_matrix)

View file

@ -0,0 +1,259 @@
"""Compressed Sparse Column matrix format"""
__docformat__ = "restructuredtext en"
__all__ = ['csc_matrix', 'isspmatrix_csc']
import numpy as np
from .base import spmatrix
from ._sparsetools import csc_tocsr, expandptr
from .sputils import upcast, get_index_dtype
from .compressed import _cs_matrix
class csc_matrix(_cs_matrix):
"""
Compressed Sparse Column matrix
This can be instantiated in several ways:
csc_matrix(D)
with a dense matrix or rank-2 ndarray D
csc_matrix(S)
with another sparse matrix S (equivalent to S.tocsc())
csc_matrix((M, N), [dtype])
to construct an empty matrix with shape (M, N)
dtype is optional, defaulting to dtype='d'.
csc_matrix((data, (row_ind, col_ind)), [shape=(M, N)])
where ``data``, ``row_ind`` and ``col_ind`` satisfy the
relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
csc_matrix((data, indices, indptr), [shape=(M, N)])
is the standard CSC representation where the row indices for
column i are stored in ``indices[indptr[i]:indptr[i+1]]``
and their corresponding values are stored in
``data[indptr[i]:indptr[i+1]]``. If the shape parameter is
not supplied, the matrix dimensions are inferred from
the index arrays.
Attributes
----------
dtype : dtype
Data type of the matrix
shape : 2-tuple
Shape of the matrix
ndim : int
Number of dimensions (this is always 2)
nnz
Number of stored values, including explicit zeros
data
Data array of the matrix
indices
CSC format index array
indptr
CSC format index pointer array
has_sorted_indices
Whether indices are sorted
Notes
-----
Sparse matrices can be used in arithmetic operations: they support
addition, subtraction, multiplication, division, and matrix power.
Advantages of the CSC format
- efficient arithmetic operations CSC + CSC, CSC * CSC, etc.
- efficient column slicing
- fast matrix vector products (CSR, BSR may be faster)
Disadvantages of the CSC format
- slow row slicing operations (consider CSR)
- changes to the sparsity structure are expensive (consider LIL or DOK)
Examples
--------
>>> import numpy as np
>>> from scipy.sparse import csc_matrix
>>> csc_matrix((3, 4), dtype=np.int8).toarray()
array([[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]], dtype=int8)
>>> row = np.array([0, 2, 2, 0, 1, 2])
>>> col = np.array([0, 0, 1, 2, 2, 2])
>>> data = np.array([1, 2, 3, 4, 5, 6])
>>> csc_matrix((data, (row, col)), shape=(3, 3)).toarray()
array([[1, 0, 4],
[0, 0, 5],
[2, 3, 6]])
>>> indptr = np.array([0, 2, 3, 6])
>>> indices = np.array([0, 2, 2, 0, 1, 2])
>>> data = np.array([1, 2, 3, 4, 5, 6])
>>> csc_matrix((data, indices, indptr), shape=(3, 3)).toarray()
array([[1, 0, 4],
[0, 0, 5],
[2, 3, 6]])
"""
format = 'csc'
def transpose(self, axes=None, copy=False):
if axes is not None:
raise ValueError(("Sparse matrices do not support "
"an 'axes' parameter because swapping "
"dimensions is the only logical permutation."))
M, N = self.shape
from .csr import csr_matrix
return csr_matrix((self.data, self.indices,
self.indptr), (N, M), copy=copy)
transpose.__doc__ = spmatrix.transpose.__doc__
def __iter__(self):
for r in self.tocsr():
yield r
def tocsc(self, copy=False):
if copy:
return self.copy()
else:
return self
tocsc.__doc__ = spmatrix.tocsc.__doc__
def tocsr(self, copy=False):
M,N = self.shape
idx_dtype = get_index_dtype((self.indptr, self.indices),
maxval=max(self.nnz, N))
indptr = np.empty(M + 1, dtype=idx_dtype)
indices = np.empty(self.nnz, dtype=idx_dtype)
data = np.empty(self.nnz, dtype=upcast(self.dtype))
csc_tocsr(M, N,
self.indptr.astype(idx_dtype),
self.indices.astype(idx_dtype),
self.data,
indptr,
indices,
data)
from .csr import csr_matrix
A = csr_matrix((data, indices, indptr), shape=self.shape, copy=False)
A.has_sorted_indices = True
return A
tocsr.__doc__ = spmatrix.tocsr.__doc__
def nonzero(self):
# CSC can't use _cs_matrix's .nonzero method because it
# returns the indices sorted for self transposed.
# Get row and col indices, from _cs_matrix.tocoo
major_dim, minor_dim = self._swap(self.shape)
minor_indices = self.indices
major_indices = np.empty(len(minor_indices), dtype=self.indices.dtype)
expandptr(major_dim, self.indptr, major_indices)
row, col = self._swap((major_indices, minor_indices))
# Remove explicit zeros
nz_mask = self.data != 0
row = row[nz_mask]
col = col[nz_mask]
# Sort them to be in C-style order
ind = np.argsort(row, kind='mergesort')
row = row[ind]
col = col[ind]
return row, col
nonzero.__doc__ = _cs_matrix.nonzero.__doc__
def getrow(self, i):
"""Returns a copy of row i of the matrix, as a (1 x n)
CSR matrix (row vector).
"""
M, N = self.shape
i = int(i)
if i < 0:
i += M
if i < 0 or i >= M:
raise IndexError('index (%d) out of range' % i)
return self._get_submatrix(minor=i).tocsr()
def getcol(self, i):
"""Returns a copy of column i of the matrix, as a (m x 1)
CSC matrix (column vector).
"""
M, N = self.shape
i = int(i)
if i < 0:
i += N
if i < 0 or i >= N:
raise IndexError('index (%d) out of range' % i)
return self._get_submatrix(major=i, copy=True)
def _get_intXarray(self, row, col):
return self._major_index_fancy(col)._get_submatrix(minor=row)
def _get_intXslice(self, row, col):
if col.step in (1, None):
return self._get_submatrix(major=col, minor=row, copy=True)
return self._major_slice(col)._get_submatrix(minor=row)
def _get_sliceXint(self, row, col):
if row.step in (1, None):
return self._get_submatrix(major=col, minor=row, copy=True)
return self._get_submatrix(major=col)._minor_slice(row)
def _get_sliceXarray(self, row, col):
return self._major_index_fancy(col)._minor_slice(row)
def _get_arrayXint(self, row, col):
return self._get_submatrix(major=col)._minor_index_fancy(row)
def _get_arrayXslice(self, row, col):
return self._major_slice(col)._minor_index_fancy(row)
# these functions are used by the parent class (_cs_matrix)
# to remove redudancy between csc_matrix and csr_matrix
def _swap(self, x):
"""swap the members of x if this is a column-oriented matrix
"""
return x[1], x[0]
def isspmatrix_csc(x):
"""Is x of csc_matrix type?
Parameters
----------
x
object to check for being a csc matrix
Returns
-------
bool
True if x is a csc matrix, False otherwise
Examples
--------
>>> from scipy.sparse import csc_matrix, isspmatrix_csc
>>> isspmatrix_csc(csc_matrix([[5]]))
True
>>> from scipy.sparse import csc_matrix, csr_matrix, isspmatrix_csc
>>> isspmatrix_csc(csr_matrix([[5]]))
False
"""
return isinstance(x, csc_matrix)

View file

@ -0,0 +1,195 @@
r"""
Compressed sparse graph routines (:mod:`scipy.sparse.csgraph`)
==============================================================
.. currentmodule:: scipy.sparse.csgraph
Fast graph algorithms based on sparse matrix representations.
Contents
--------
.. autosummary::
:toctree: generated/
connected_components -- determine connected components of a graph
laplacian -- compute the laplacian of a graph
shortest_path -- compute the shortest path between points on a positive graph
dijkstra -- use Dijkstra's algorithm for shortest path
floyd_warshall -- use the Floyd-Warshall algorithm for shortest path
bellman_ford -- use the Bellman-Ford algorithm for shortest path
johnson -- use Johnson's algorithm for shortest path
breadth_first_order -- compute a breadth-first order of nodes
depth_first_order -- compute a depth-first order of nodes
breadth_first_tree -- construct the breadth-first tree from a given node
depth_first_tree -- construct a depth-first tree from a given node
minimum_spanning_tree -- construct the minimum spanning tree of a graph
reverse_cuthill_mckee -- compute permutation for reverse Cuthill-McKee ordering
maximum_flow -- solve the maximum flow problem for a graph
maximum_bipartite_matching -- compute a maximum matching of a bipartite graph
structural_rank -- compute the structural rank of a graph
NegativeCycleError
.. autosummary::
:toctree: generated/
construct_dist_matrix
csgraph_from_dense
csgraph_from_masked
csgraph_masked_from_dense
csgraph_to_dense
csgraph_to_masked
reconstruct_path
Graph Representations
---------------------
This module uses graphs which are stored in a matrix format. A
graph with N nodes can be represented by an (N x N) adjacency matrix G.
If there is a connection from node i to node j, then G[i, j] = w, where
w is the weight of the connection. For nodes i and j which are
not connected, the value depends on the representation:
- for dense array representations, non-edges are represented by
G[i, j] = 0, infinity, or NaN.
- for dense masked representations (of type np.ma.MaskedArray), non-edges
are represented by masked values. This can be useful when graphs with
zero-weight edges are desired.
- for sparse array representations, non-edges are represented by
non-entries in the matrix. This sort of sparse representation also
allows for edges with zero weights.
As a concrete example, imagine that you would like to represent the following
undirected graph::
G
(0)
/ \
1 2
/ \
(2) (1)
This graph has three nodes, where node 0 and 1 are connected by an edge of
weight 2, and nodes 0 and 2 are connected by an edge of weight 1.
We can construct the dense, masked, and sparse representations as follows,
keeping in mind that an undirected graph is represented by a symmetric matrix::
>>> G_dense = np.array([[0, 2, 1],
... [2, 0, 0],
... [1, 0, 0]])
>>> G_masked = np.ma.masked_values(G_dense, 0)
>>> from scipy.sparse import csr_matrix
>>> G_sparse = csr_matrix(G_dense)
This becomes more difficult when zero edges are significant. For example,
consider the situation when we slightly modify the above graph::
G2
(0)
/ \
0 2
/ \
(2) (1)
This is identical to the previous graph, except nodes 0 and 2 are connected
by an edge of zero weight. In this case, the dense representation above
leads to ambiguities: how can non-edges be represented if zero is a meaningful
value? In this case, either a masked or sparse representation must be used
to eliminate the ambiguity::
>>> G2_data = np.array([[np.inf, 2, 0 ],
... [2, np.inf, np.inf],
... [0, np.inf, np.inf]])
>>> G2_masked = np.ma.masked_invalid(G2_data)
>>> from scipy.sparse.csgraph import csgraph_from_dense
>>> # G2_sparse = csr_matrix(G2_data) would give the wrong result
>>> G2_sparse = csgraph_from_dense(G2_data, null_value=np.inf)
>>> G2_sparse.data
array([ 2., 0., 2., 0.])
Here we have used a utility routine from the csgraph submodule in order to
convert the dense representation to a sparse representation which can be
understood by the algorithms in submodule. By viewing the data array, we
can see that the zero values are explicitly encoded in the graph.
Directed vs. undirected
^^^^^^^^^^^^^^^^^^^^^^^
Matrices may represent either directed or undirected graphs. This is
specified throughout the csgraph module by a boolean keyword. Graphs are
assumed to be directed by default. In a directed graph, traversal from node
i to node j can be accomplished over the edge G[i, j], but not the edge
G[j, i]. Consider the following dense graph::
>>> G_dense = np.array([[0, 1, 0],
... [2, 0, 3],
... [0, 4, 0]])
When ``directed=True`` we get the graph::
---1--> ---3-->
(0) (1) (2)
<--2--- <--4---
In a non-directed graph, traversal from node i to node j can be
accomplished over either G[i, j] or G[j, i]. If both edges are not null,
and the two have unequal weights, then the smaller of the two is used.
So for the same graph, when ``directed=False`` we get the graph::
(0)--1--(1)--2--(2)
Note that a symmetric matrix will represent an undirected graph, regardless
of whether the 'directed' keyword is set to True or False. In this case,
using ``directed=True`` generally leads to more efficient computation.
The routines in this module accept as input either scipy.sparse representations
(csr, csc, or lil format), masked representations, or dense representations
with non-edges indicated by zeros, infinities, and NaN entries.
"""
__docformat__ = "restructuredtext en"
__all__ = ['connected_components',
'laplacian',
'shortest_path',
'floyd_warshall',
'dijkstra',
'bellman_ford',
'johnson',
'breadth_first_order',
'depth_first_order',
'breadth_first_tree',
'depth_first_tree',
'minimum_spanning_tree',
'reverse_cuthill_mckee',
'maximum_flow',
'maximum_bipartite_matching',
'structural_rank',
'construct_dist_matrix',
'reconstruct_path',
'csgraph_masked_from_dense',
'csgraph_from_dense',
'csgraph_from_masked',
'csgraph_to_dense',
'csgraph_to_masked',
'NegativeCycleError']
from ._laplacian import laplacian
from ._shortest_path import shortest_path, floyd_warshall, dijkstra,\
bellman_ford, johnson, NegativeCycleError
from ._traversal import breadth_first_order, depth_first_order, \
breadth_first_tree, depth_first_tree, connected_components
from ._min_spanning_tree import minimum_spanning_tree
from ._flow import maximum_flow
from ._matching import maximum_bipartite_matching
from ._reordering import reverse_cuthill_mckee, structural_rank
from ._tools import construct_dist_matrix, reconstruct_path,\
csgraph_from_dense, csgraph_to_dense, csgraph_masked_from_dense,\
csgraph_from_masked, csgraph_to_masked
from scipy._lib._testutils import PytestTester
test = PytestTester(__name__)
del PytestTester

View file

@ -0,0 +1,126 @@
"""
Laplacian of a compressed-sparse graph
"""
# Authors: Aric Hagberg <hagberg@lanl.gov>
# Gael Varoquaux <gael.varoquaux@normalesup.org>
# Jake Vanderplas <vanderplas@astro.washington.edu>
# License: BSD
import numpy as np
from scipy.sparse import isspmatrix
###############################################################################
# Graph laplacian
def laplacian(csgraph, normed=False, return_diag=False, use_out_degree=False):
"""
Return the Laplacian matrix of a directed graph.
Parameters
----------
csgraph : array_like or sparse matrix, 2 dimensions
compressed-sparse graph, with shape (N, N).
normed : bool, optional
If True, then compute symmetric normalized Laplacian.
return_diag : bool, optional
If True, then also return an array related to vertex degrees.
use_out_degree : bool, optional
If True, then use out-degree instead of in-degree.
This distinction matters only if the graph is asymmetric.
Default: False.
Returns
-------
lap : ndarray or sparse matrix
The N x N laplacian matrix of csgraph. It will be a NumPy array (dense)
if the input was dense, or a sparse matrix otherwise.
diag : ndarray, optional
The length-N diagonal of the Laplacian matrix.
For the normalized Laplacian, this is the array of square roots
of vertex degrees or 1 if the degree is zero.
Notes
-----
The Laplacian matrix of a graph is sometimes referred to as the
"Kirchoff matrix" or the "admittance matrix", and is useful in many
parts of spectral graph theory. In particular, the eigen-decomposition
of the laplacian matrix can give insight into many properties of the graph.
Examples
--------
>>> from scipy.sparse import csgraph
>>> G = np.arange(5) * np.arange(5)[:, np.newaxis]
>>> G
array([[ 0, 0, 0, 0, 0],
[ 0, 1, 2, 3, 4],
[ 0, 2, 4, 6, 8],
[ 0, 3, 6, 9, 12],
[ 0, 4, 8, 12, 16]])
>>> csgraph.laplacian(G, normed=False)
array([[ 0, 0, 0, 0, 0],
[ 0, 9, -2, -3, -4],
[ 0, -2, 16, -6, -8],
[ 0, -3, -6, 21, -12],
[ 0, -4, -8, -12, 24]])
"""
if csgraph.ndim != 2 or csgraph.shape[0] != csgraph.shape[1]:
raise ValueError('csgraph must be a square matrix or array')
if normed and (np.issubdtype(csgraph.dtype, np.signedinteger)
or np.issubdtype(csgraph.dtype, np.uint)):
csgraph = csgraph.astype(float)
create_lap = _laplacian_sparse if isspmatrix(csgraph) else _laplacian_dense
degree_axis = 1 if use_out_degree else 0
lap, d = create_lap(csgraph, normed=normed, axis=degree_axis)
if return_diag:
return lap, d
return lap
def _setdiag_dense(A, d):
A.flat[::len(d)+1] = d
def _laplacian_sparse(graph, normed=False, axis=0):
if graph.format in ('lil', 'dok'):
m = graph.tocoo()
needs_copy = False
else:
m = graph
needs_copy = True
w = m.sum(axis=axis).getA1() - m.diagonal()
if normed:
m = m.tocoo(copy=needs_copy)
isolated_node_mask = (w == 0)
w = np.where(isolated_node_mask, 1, np.sqrt(w))
m.data /= w[m.row]
m.data /= w[m.col]
m.data *= -1
m.setdiag(1 - isolated_node_mask)
else:
if m.format == 'dia':
m = m.copy()
else:
m = m.tocoo(copy=needs_copy)
m.data *= -1
m.setdiag(w)
return m, w
def _laplacian_dense(graph, normed=False, axis=0):
m = np.array(graph)
np.fill_diagonal(m, 0)
w = m.sum(axis=axis)
if normed:
isolated_node_mask = (w == 0)
w = np.where(isolated_node_mask, 1, np.sqrt(w))
m /= w
m /= w[:, np.newaxis]
m *= -1
_setdiag_dense(m, 1 - isolated_node_mask)
else:
m *= -1
_setdiag_dense(m, w)
return m, w

View file

@ -0,0 +1,56 @@
import numpy as np
from scipy.sparse import csr_matrix, isspmatrix, isspmatrix_csc
from ._tools import csgraph_to_dense, csgraph_from_dense,\
csgraph_masked_from_dense, csgraph_from_masked
DTYPE = np.float64
def validate_graph(csgraph, directed, dtype=DTYPE,
csr_output=True, dense_output=True,
copy_if_dense=False, copy_if_sparse=False,
null_value_in=0, null_value_out=np.inf,
infinity_null=True, nan_null=True):
"""Routine for validation and conversion of csgraph inputs"""
if not (csr_output or dense_output):
raise ValueError("Internal: dense or csr output must be true")
# if undirected and csc storage, then transposing in-place
# is quicker than later converting to csr.
if (not directed) and isspmatrix_csc(csgraph):
csgraph = csgraph.T
if isspmatrix(csgraph):
if csr_output:
csgraph = csr_matrix(csgraph, dtype=DTYPE, copy=copy_if_sparse)
else:
csgraph = csgraph_to_dense(csgraph, null_value=null_value_out)
elif np.ma.isMaskedArray(csgraph):
if dense_output:
mask = csgraph.mask
csgraph = np.array(csgraph.data, dtype=DTYPE, copy=copy_if_dense)
csgraph[mask] = null_value_out
else:
csgraph = csgraph_from_masked(csgraph)
else:
if dense_output:
csgraph = csgraph_masked_from_dense(csgraph,
copy=copy_if_dense,
null_value=null_value_in,
nan_null=nan_null,
infinity_null=infinity_null)
mask = csgraph.mask
csgraph = np.asarray(csgraph.data, dtype=DTYPE)
csgraph[mask] = null_value_out
else:
csgraph = csgraph_from_dense(csgraph, null_value=null_value_in,
infinity_null=infinity_null,
nan_null=nan_null)
if csgraph.ndim != 2:
raise ValueError("compressed-sparse graph must be 2-D")
if csgraph.shape[0] != csgraph.shape[1]:
raise ValueError("compressed-sparse graph must be shape (N, N)")
return csgraph

View file

@ -0,0 +1,38 @@
def configuration(parent_package='', top_path=None):
import numpy
from numpy.distutils.misc_util import Configuration
config = Configuration('csgraph', parent_package, top_path)
config.add_data_dir('tests')
config.add_extension('_shortest_path',
sources=['_shortest_path.c'],
include_dirs=[numpy.get_include()])
config.add_extension('_traversal',
sources=['_traversal.c'],
include_dirs=[numpy.get_include()])
config.add_extension('_min_spanning_tree',
sources=['_min_spanning_tree.c'],
include_dirs=[numpy.get_include()])
config.add_extension('_matching',
sources=['_matching.c'],
include_dirs=[numpy.get_include()])
config.add_extension('_flow',
sources=['_flow.c'],
include_dirs=[numpy.get_include()])
config.add_extension('_reordering',
sources=['_reordering.c'],
include_dirs=[numpy.get_include()])
config.add_extension('_tools',
sources=['_tools.c'],
include_dirs=[numpy.get_include()])
return config

View file

@ -0,0 +1,99 @@
import numpy as np
from numpy.testing import assert_equal, assert_array_almost_equal
from scipy.sparse import csgraph
def test_weak_connections():
Xde = np.array([[0, 1, 0],
[0, 0, 0],
[0, 0, 0]])
Xsp = csgraph.csgraph_from_dense(Xde, null_value=0)
for X in Xsp, Xde:
n_components, labels =\
csgraph.connected_components(X, directed=True,
connection='weak')
assert_equal(n_components, 2)
assert_array_almost_equal(labels, [0, 0, 1])
def test_strong_connections():
X1de = np.array([[0, 1, 0],
[0, 0, 0],
[0, 0, 0]])
X2de = X1de + X1de.T
X1sp = csgraph.csgraph_from_dense(X1de, null_value=0)
X2sp = csgraph.csgraph_from_dense(X2de, null_value=0)
for X in X1sp, X1de:
n_components, labels =\
csgraph.connected_components(X, directed=True,
connection='strong')
assert_equal(n_components, 3)
labels.sort()
assert_array_almost_equal(labels, [0, 1, 2])
for X in X2sp, X2de:
n_components, labels =\
csgraph.connected_components(X, directed=True,
connection='strong')
assert_equal(n_components, 2)
labels.sort()
assert_array_almost_equal(labels, [0, 0, 1])
def test_strong_connections2():
X = np.array([[0, 0, 0, 0, 0, 0],
[1, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0],
[0, 0, 1, 0, 1, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0]])
n_components, labels =\
csgraph.connected_components(X, directed=True,
connection='strong')
assert_equal(n_components, 5)
labels.sort()
assert_array_almost_equal(labels, [0, 1, 2, 2, 3, 4])
def test_weak_connections2():
X = np.array([[0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0],
[0, 0, 1, 0, 1, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0]])
n_components, labels =\
csgraph.connected_components(X, directed=True,
connection='weak')
assert_equal(n_components, 2)
labels.sort()
assert_array_almost_equal(labels, [0, 0, 1, 1, 1, 1])
def test_ticket1876():
# Regression test: this failed in the original implementation
# There should be two strongly-connected components; previously gave one
g = np.array([[0, 1, 1, 0],
[1, 0, 0, 1],
[0, 0, 0, 1],
[0, 0, 1, 0]])
n_components, labels = csgraph.connected_components(g, connection='strong')
assert_equal(n_components, 2)
assert_equal(labels[0], labels[1])
assert_equal(labels[2], labels[3])
def test_fully_connected_graph():
# Fully connected dense matrices raised an exception.
# https://github.com/scipy/scipy/issues/3818
g = np.ones((4, 4))
n_components, labels = csgraph.connected_components(g)
assert_equal(n_components, 1)

View file

@ -0,0 +1,61 @@
import numpy as np
from numpy.testing import assert_array_almost_equal
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import csgraph_from_dense, csgraph_to_dense
def test_csgraph_from_dense():
np.random.seed(1234)
G = np.random.random((10, 10))
some_nulls = (G < 0.4)
all_nulls = (G < 0.8)
for null_value in [0, np.nan, np.inf]:
G[all_nulls] = null_value
with np.errstate(invalid="ignore"):
G_csr = csgraph_from_dense(G, null_value=0)
G[all_nulls] = 0
assert_array_almost_equal(G, G_csr.toarray())
for null_value in [np.nan, np.inf]:
G[all_nulls] = 0
G[some_nulls] = null_value
with np.errstate(invalid="ignore"):
G_csr = csgraph_from_dense(G, null_value=0)
G[all_nulls] = 0
assert_array_almost_equal(G, G_csr.toarray())
def test_csgraph_to_dense():
np.random.seed(1234)
G = np.random.random((10, 10))
nulls = (G < 0.8)
G[nulls] = np.inf
G_csr = csgraph_from_dense(G)
for null_value in [0, 10, -np.inf, np.inf]:
G[nulls] = null_value
assert_array_almost_equal(G, csgraph_to_dense(G_csr, null_value))
def test_multiple_edges():
# create a random sqare matrix with an even number of elements
np.random.seed(1234)
X = np.random.random((10, 10))
Xcsr = csr_matrix(X)
# now double-up every other column
Xcsr.indices[::2] = Xcsr.indices[1::2]
# normal sparse toarray() will sum the duplicated edges
Xdense = Xcsr.toarray()
assert_array_almost_equal(Xdense[:, 1::2],
X[:, ::2] + X[:, 1::2])
# csgraph_to_dense chooses the minimum of each duplicated edge
Xdense = csgraph_to_dense(Xcsr)
assert_array_almost_equal(Xdense[:, 1::2],
np.minimum(X[:, ::2], X[:, 1::2]))

View file

@ -0,0 +1,124 @@
import numpy as np
from numpy.testing import assert_array_equal
import pytest
from scipy.sparse import csr_matrix, csc_matrix
from scipy.sparse.csgraph import maximum_flow
def test_raises_on_dense_input():
with pytest.raises(TypeError):
graph = np.array([[0, 1], [0, 0]])
maximum_flow(graph, 0, 1)
def test_raises_on_csc_input():
with pytest.raises(TypeError):
graph = csc_matrix([[0, 1], [0, 0]])
maximum_flow(graph, 0, 1)
def test_raises_on_floating_point_input():
with pytest.raises(ValueError):
graph = csr_matrix([[0, 1.5], [0, 0]], dtype=np.float64)
maximum_flow(graph, 0, 1)
def test_raises_when_source_is_sink():
with pytest.raises(ValueError):
graph = csr_matrix([[0, 1], [0, 0]])
maximum_flow(graph, 0, 0)
@pytest.mark.parametrize('source', [-1, 2, 3])
def test_raises_when_source_is_out_of_bounds(source):
with pytest.raises(ValueError):
graph = csr_matrix([[0, 1], [0, 0]])
maximum_flow(graph, source, 1)
@pytest.mark.parametrize('sink', [-1, 2, 3])
def test_raises_when_sink_is_out_of_bounds(sink):
with pytest.raises(ValueError):
graph = csr_matrix([[0, 1], [0, 0]])
maximum_flow(graph, 0, sink)
def test_simple_graph():
# This graph looks as follows:
# (0) --5--> (1)
graph = csr_matrix([[0, 5], [0, 0]])
res = maximum_flow(graph, 0, 1)
assert res.flow_value == 5
expected_residual = np.array([[0, 5], [-5, 0]])
assert_array_equal(res.residual.toarray(), expected_residual)
def test_bottle_neck_graph():
# This graph cannot use the full capacity between 0 and 1:
# (0) --5--> (1) --3--> (2)
graph = csr_matrix([[0, 5, 0], [0, 0, 3], [0, 0, 0]])
res = maximum_flow(graph, 0, 2)
assert res.flow_value == 3
expected_residual = np.array([[0, 3, 0], [-3, 0, 3], [0, -3, 0]])
assert_array_equal(res.residual.toarray(), expected_residual)
def test_backwards_flow():
# This example causes backwards flow between vertices 3 and 4,
# and so this test ensures that we handle that accordingly. See
# https://stackoverflow.com/q/38843963/5085211
# for more information.
graph = csr_matrix([[0, 10, 0, 0, 10, 0, 0, 0],
[0, 0, 10, 0, 0, 0, 0, 0],
[0, 0, 0, 10, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 10],
[0, 0, 0, 10, 0, 10, 0, 0],
[0, 0, 0, 0, 0, 0, 10, 0],
[0, 0, 0, 0, 0, 0, 0, 10],
[0, 0, 0, 0, 0, 0, 0, 0]])
res = maximum_flow(graph, 0, 7)
assert res.flow_value == 20
expected_residual = np.array([[0, 10, 0, 0, 10, 0, 0, 0],
[-10, 0, 10, 0, 0, 0, 0, 0],
[0, -10, 0, 10, 0, 0, 0, 0],
[0, 0, -10, 0, 0, 0, 0, 10],
[-10, 0, 0, 0, 0, 10, 0, 0],
[0, 0, 0, 0, -10, 0, 10, 0],
[0, 0, 0, 0, 0, -10, 0, 10],
[0, 0, 0, -10, 0, 0, -10, 0]])
assert_array_equal(res.residual.toarray(), expected_residual)
def test_example_from_clrs_chapter_26_1():
# See page 659 in CLRS second edition, but note that the maximum flow
# we find is slightly different than the one in CLRS; we push a flow of
# 12 to v_1 instead of v_2.
graph = csr_matrix([[0, 16, 13, 0, 0, 0],
[0, 0, 10, 12, 0, 0],
[0, 4, 0, 0, 14, 0],
[0, 0, 9, 0, 0, 20],
[0, 0, 0, 7, 0, 4],
[0, 0, 0, 0, 0, 0]])
res = maximum_flow(graph, 0, 5)
assert res.flow_value == 23
expected_residual = np.array([[0, 12, 11, 0, 0, 0],
[-12, 0, 0, 12, 0, 0],
[-11, 0, 0, 0, 11, 0],
[0, -12, 0, 0, -7, 19],
[0, 0, -11, 7, 0, 4],
[0, 0, 0, -19, -4, 0]])
assert_array_equal(res.residual.toarray(), expected_residual)
def test_disconnected_graph():
# This tests the following disconnected graph:
# (0) --5--> (1) (2) --3--> (3)
graph = csr_matrix([[0, 5, 0, 0],
[0, 0, 0, 0],
[0, 0, 9, 3],
[0, 0, 0, 0]])
res = maximum_flow(graph, 0, 3)
assert res.flow_value == 0
expected_residual = np.zeros((4, 4), dtype=np.int32)
assert_array_equal(res.residual.toarray(), expected_residual)

View file

@ -0,0 +1,134 @@
# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
# Jake Vanderplas <vanderplas@astro.washington.edu>
# License: BSD
import numpy as np
from numpy.testing import assert_allclose, assert_array_almost_equal
from pytest import raises as assert_raises
from scipy import sparse
from scipy.sparse import csgraph
def _explicit_laplacian(x, normed=False):
if sparse.issparse(x):
x = x.todense()
x = np.asarray(x)
y = -1.0 * x
for j in range(y.shape[0]):
y[j,j] = x[j,j+1:].sum() + x[j,:j].sum()
if normed:
d = np.diag(y).copy()
d[d == 0] = 1.0
y /= d[:,None]**.5
y /= d[None,:]**.5
return y
def _check_symmetric_graph_laplacian(mat, normed):
if not hasattr(mat, 'shape'):
mat = eval(mat, dict(np=np, sparse=sparse))
if sparse.issparse(mat):
sp_mat = mat
mat = sp_mat.todense()
else:
sp_mat = sparse.csr_matrix(mat)
laplacian = csgraph.laplacian(mat, normed=normed)
n_nodes = mat.shape[0]
if not normed:
assert_array_almost_equal(laplacian.sum(axis=0), np.zeros(n_nodes))
assert_array_almost_equal(laplacian.T, laplacian)
assert_array_almost_equal(laplacian,
csgraph.laplacian(sp_mat, normed=normed).todense())
assert_array_almost_equal(laplacian,
_explicit_laplacian(mat, normed=normed))
def test_laplacian_value_error():
for t in int, float, complex:
for m in ([1, 1],
[[[1]]],
[[1, 2, 3], [4, 5, 6]],
[[1, 2], [3, 4], [5, 5]]):
A = np.array(m, dtype=t)
assert_raises(ValueError, csgraph.laplacian, A)
def test_symmetric_graph_laplacian():
symmetric_mats = ('np.arange(10) * np.arange(10)[:, np.newaxis]',
'np.ones((7, 7))',
'np.eye(19)',
'sparse.diags([1, 1], [-1, 1], shape=(4,4))',
'sparse.diags([1, 1], [-1, 1], shape=(4,4)).todense()',
'np.asarray(sparse.diags([1, 1], [-1, 1], shape=(4,4)).todense())',
'np.vander(np.arange(4)) + np.vander(np.arange(4)).T')
for mat_str in symmetric_mats:
for normed in True, False:
_check_symmetric_graph_laplacian(mat_str, normed)
def _assert_allclose_sparse(a, b, **kwargs):
# helper function that can deal with sparse matrices
if sparse.issparse(a):
a = a.toarray()
if sparse.issparse(b):
b = a.toarray()
assert_allclose(a, b, **kwargs)
def _check_laplacian(A, desired_L, desired_d, normed, use_out_degree):
for arr_type in np.array, sparse.csr_matrix, sparse.coo_matrix:
for t in int, float, complex:
adj = arr_type(A, dtype=t)
L = csgraph.laplacian(adj, normed=normed, return_diag=False,
use_out_degree=use_out_degree)
_assert_allclose_sparse(L, desired_L, atol=1e-12)
L, d = csgraph.laplacian(adj, normed=normed, return_diag=True,
use_out_degree=use_out_degree)
_assert_allclose_sparse(L, desired_L, atol=1e-12)
_assert_allclose_sparse(d, desired_d, atol=1e-12)
def test_asymmetric_laplacian():
# adjacency matrix
A = [[0, 1, 0],
[4, 2, 0],
[0, 0, 0]]
# Laplacian matrix using out-degree
L = [[1, -1, 0],
[-4, 4, 0],
[0, 0, 0]]
d = [1, 4, 0]
_check_laplacian(A, L, d, normed=False, use_out_degree=True)
# normalized Laplacian matrix using out-degree
L = [[1, -0.5, 0],
[-2, 1, 0],
[0, 0, 0]]
d = [1, 2, 1]
_check_laplacian(A, L, d, normed=True, use_out_degree=True)
# Laplacian matrix using in-degree
L = [[4, -1, 0],
[-4, 1, 0],
[0, 0, 0]]
d = [4, 1, 0]
_check_laplacian(A, L, d, normed=False, use_out_degree=False)
# normalized Laplacian matrix using in-degree
L = [[1, -0.5, 0],
[-2, 1, 0],
[0, 0, 0]]
d = [2, 1, 1]
_check_laplacian(A, L, d, normed=True, use_out_degree=False)
def test_sparse_formats():
for fmt in ('csr', 'csc', 'coo', 'lil', 'dok', 'dia', 'bsr'):
mat = sparse.diags([1, 1], [-1, 1], shape=(4,4), format=fmt)
for normed in True, False:
_check_symmetric_graph_laplacian(mat, normed)

View file

@ -0,0 +1,145 @@
import numpy as np
from numpy.testing import assert_array_equal, assert_equal
import pytest
from scipy.sparse import csr_matrix, coo_matrix, diags
from scipy.sparse.csgraph import maximum_bipartite_matching
def test_raises_on_dense_input():
with pytest.raises(TypeError):
graph = np.array([[0, 1], [0, 0]])
maximum_bipartite_matching(graph)
def test_empty_graph():
graph = csr_matrix((0, 0))
x = maximum_bipartite_matching(graph, perm_type='row')
y = maximum_bipartite_matching(graph, perm_type='column')
expected_matching = np.array([])
assert_array_equal(expected_matching, x)
assert_array_equal(expected_matching, y)
def test_empty_left_partition():
graph = csr_matrix((2, 0))
x = maximum_bipartite_matching(graph, perm_type='row')
y = maximum_bipartite_matching(graph, perm_type='column')
assert_array_equal(np.array([]), x)
assert_array_equal(np.array([-1, -1]), y)
def test_empty_right_partition():
graph = csr_matrix((0, 3))
x = maximum_bipartite_matching(graph, perm_type='row')
y = maximum_bipartite_matching(graph, perm_type='column')
assert_array_equal(np.array([-1, -1, -1]), x)
assert_array_equal(np.array([]), y)
def test_graph_with_no_edges():
graph = csr_matrix((2, 2))
x = maximum_bipartite_matching(graph, perm_type='row')
y = maximum_bipartite_matching(graph, perm_type='column')
assert_array_equal(np.array([-1, -1]), x)
assert_array_equal(np.array([-1, -1]), y)
def test_graph_that_causes_augmentation():
# In this graph, column 1 is initially assigned to row 1, but it should be
# reassigned to make room for row 2.
graph = csr_matrix([[1, 1], [1, 0]])
x = maximum_bipartite_matching(graph, perm_type='column')
y = maximum_bipartite_matching(graph, perm_type='row')
expected_matching = np.array([1, 0])
assert_array_equal(expected_matching, x)
assert_array_equal(expected_matching, y)
def test_graph_with_more_rows_than_columns():
graph = csr_matrix([[1, 1], [1, 0], [0, 1]])
x = maximum_bipartite_matching(graph, perm_type='column')
y = maximum_bipartite_matching(graph, perm_type='row')
assert_array_equal(np.array([0, -1, 1]), x)
assert_array_equal(np.array([0, 2]), y)
def test_graph_with_more_columns_than_rows():
graph = csr_matrix([[1, 1, 0], [0, 0, 1]])
x = maximum_bipartite_matching(graph, perm_type='column')
y = maximum_bipartite_matching(graph, perm_type='row')
assert_array_equal(np.array([0, 2]), x)
assert_array_equal(np.array([0, -1, 1]), y)
def test_explicit_zeros_count_as_edges():
data = [0, 0]
indices = [1, 0]
indptr = [0, 1, 2]
graph = csr_matrix((data, indices, indptr), shape=(2, 2))
x = maximum_bipartite_matching(graph, perm_type='row')
y = maximum_bipartite_matching(graph, perm_type='column')
expected_matching = np.array([1, 0])
assert_array_equal(expected_matching, x)
assert_array_equal(expected_matching, y)
def test_feasibility_of_result():
# This is a regression test for GitHub issue #11458
data = np.ones(50, dtype=int)
indices = [11, 12, 19, 22, 23, 5, 22, 3, 8, 10, 5, 6, 11, 12, 13, 5, 13,
14, 20, 22, 3, 15, 3, 13, 14, 11, 12, 19, 22, 23, 5, 22, 3, 8,
10, 5, 6, 11, 12, 13, 5, 13, 14, 20, 22, 3, 15, 3, 13, 14]
indptr = [0, 5, 7, 10, 10, 15, 20, 22, 22, 23, 25, 30, 32, 35, 35, 40, 45,
47, 47, 48, 50]
graph = csr_matrix((data, indices, indptr), shape=(20, 25))
x = maximum_bipartite_matching(graph, perm_type='row')
y = maximum_bipartite_matching(graph, perm_type='column')
assert (x != -1).sum() == 13
assert (y != -1).sum() == 13
# Ensure that each element of the matching is in fact an edge in the graph.
for u, v in zip(range(graph.shape[0]), y):
if v != -1:
assert graph[u, v]
for u, v in zip(x, range(graph.shape[1])):
if u != -1:
assert graph[u, v]
def test_large_random_graph_with_one_edge_incident_to_each_vertex():
np.random.seed(42)
A = diags(np.ones(25), offsets=0, format='csr')
rand_perm = np.random.permutation(25)
rand_perm2 = np.random.permutation(25)
Rrow = np.arange(25)
Rcol = rand_perm
Rdata = np.ones(25, dtype=int)
Rmat = coo_matrix((Rdata, (Rrow, Rcol))).tocsr()
Crow = rand_perm2
Ccol = np.arange(25)
Cdata = np.ones(25, dtype=int)
Cmat = coo_matrix((Cdata, (Crow, Ccol))).tocsr()
# Randomly permute identity matrix
B = Rmat * A * Cmat
# Row permute
perm = maximum_bipartite_matching(B, perm_type='row')
Rrow = np.arange(25)
Rcol = perm
Rdata = np.ones(25, dtype=int)
Rmat = coo_matrix((Rdata, (Rrow, Rcol))).tocsr()
C1 = Rmat * B
# Column permute
perm2 = maximum_bipartite_matching(B, perm_type='column')
Crow = perm2
Ccol = np.arange(25)
Cdata = np.ones(25, dtype=int)
Cmat = coo_matrix((Cdata, (Crow, Ccol))).tocsr()
C2 = B * Cmat
# Should get identity matrix back
assert_equal(any(C1.diagonal() == 0), False)
assert_equal(any(C2.diagonal() == 0), False)

View file

@ -0,0 +1,70 @@
import numpy as np
from numpy.testing import assert_equal
from scipy.sparse.csgraph import reverse_cuthill_mckee, structural_rank
from scipy.sparse import csc_matrix, csr_matrix, coo_matrix
def test_graph_reverse_cuthill_mckee():
A = np.array([[1, 0, 0, 0, 1, 0, 0, 0],
[0, 1, 1, 0, 0, 1, 0, 1],
[0, 1, 1, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 1, 0],
[1, 0, 1, 0, 1, 0, 0, 0],
[0, 1, 0, 0, 0, 1, 0, 1],
[0, 0, 0, 1, 0, 0, 1, 0],
[0, 1, 0, 0, 0, 1, 0, 1]], dtype=int)
graph = csr_matrix(A)
perm = reverse_cuthill_mckee(graph)
correct_perm = np.array([6, 3, 7, 5, 1, 2, 4, 0])
assert_equal(perm, correct_perm)
# Test int64 indices input
graph.indices = graph.indices.astype('int64')
graph.indptr = graph.indptr.astype('int64')
perm = reverse_cuthill_mckee(graph, True)
assert_equal(perm, correct_perm)
def test_graph_reverse_cuthill_mckee_ordering():
data = np.ones(63,dtype=int)
rows = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2,
2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9,
9, 10, 10, 10, 10, 10, 11, 11, 11, 11,
12, 12, 12, 13, 13, 13, 13, 14, 14, 14,
14, 15, 15, 15, 15, 15])
cols = np.array([0, 2, 5, 8, 10, 1, 3, 9, 11, 0, 2,
7, 10, 1, 3, 11, 4, 6, 12, 14, 0, 7, 13,
15, 4, 6, 14, 2, 5, 7, 15, 0, 8, 10, 13,
1, 9, 11, 0, 2, 8, 10, 15, 1, 3, 9, 11,
4, 12, 14, 5, 8, 13, 15, 4, 6, 12, 14,
5, 7, 10, 13, 15])
graph = coo_matrix((data, (rows,cols))).tocsr()
perm = reverse_cuthill_mckee(graph)
correct_perm = np.array([12, 14, 4, 6, 10, 8, 2, 15,
0, 13, 7, 5, 9, 11, 1, 3])
assert_equal(perm, correct_perm)
def test_graph_structural_rank():
# Test square matrix #1
A = csc_matrix([[1, 1, 0],
[1, 0, 1],
[0, 1, 0]])
assert_equal(structural_rank(A), 3)
# Test square matrix #2
rows = np.array([0,0,0,0,0,1,1,2,2,3,3,3,3,3,3,4,4,5,5,6,6,7,7])
cols = np.array([0,1,2,3,4,2,5,2,6,0,1,3,5,6,7,4,5,5,6,2,6,2,4])
data = np.ones_like(rows)
B = coo_matrix((data,(rows,cols)), shape=(8,8))
assert_equal(structural_rank(B), 6)
#Test non-square matrix
C = csc_matrix([[1, 0, 2, 0],
[2, 0, 4, 0]])
assert_equal(structural_rank(C), 2)
#Test tall matrix
assert_equal(structural_rank(C.T), 2)

View file

@ -0,0 +1,334 @@
import numpy as np
from numpy.testing import assert_array_almost_equal, assert_array_equal
from pytest import raises as assert_raises
from scipy.sparse.csgraph import (shortest_path, dijkstra, johnson,
bellman_ford, construct_dist_matrix,
NegativeCycleError)
import scipy.sparse
import pytest
directed_G = np.array([[0, 3, 3, 0, 0],
[0, 0, 0, 2, 4],
[0, 0, 0, 0, 0],
[1, 0, 0, 0, 0],
[2, 0, 0, 2, 0]], dtype=float)
undirected_G = np.array([[0, 3, 3, 1, 2],
[3, 0, 0, 2, 4],
[3, 0, 0, 0, 0],
[1, 2, 0, 0, 2],
[2, 4, 0, 2, 0]], dtype=float)
unweighted_G = (directed_G > 0).astype(float)
directed_SP = [[0, 3, 3, 5, 7],
[3, 0, 6, 2, 4],
[np.inf, np.inf, 0, np.inf, np.inf],
[1, 4, 4, 0, 8],
[2, 5, 5, 2, 0]]
directed_sparse_zero_G = scipy.sparse.csr_matrix(([0, 1, 2, 3, 1],
([0, 1, 2, 3, 4],
[1, 2, 0, 4, 3])),
shape = (5, 5))
directed_sparse_zero_SP = [[0, 0, 1, np.inf, np.inf],
[3, 0, 1, np.inf, np.inf],
[2, 2, 0, np.inf, np.inf],
[np.inf, np.inf, np.inf, 0, 3],
[np.inf, np.inf, np.inf, 1, 0]]
undirected_sparse_zero_G = scipy.sparse.csr_matrix(([0, 0, 1, 1, 2, 2, 1, 1],
([0, 1, 1, 2, 2, 0, 3, 4],
[1, 0, 2, 1, 0, 2, 4, 3])),
shape = (5, 5))
undirected_sparse_zero_SP = [[0, 0, 1, np.inf, np.inf],
[0, 0, 1, np.inf, np.inf],
[1, 1, 0, np.inf, np.inf],
[np.inf, np.inf, np.inf, 0, 1],
[np.inf, np.inf, np.inf, 1, 0]]
directed_pred = np.array([[-9999, 0, 0, 1, 1],
[3, -9999, 0, 1, 1],
[-9999, -9999, -9999, -9999, -9999],
[3, 0, 0, -9999, 1],
[4, 0, 0, 4, -9999]], dtype=float)
undirected_SP = np.array([[0, 3, 3, 1, 2],
[3, 0, 6, 2, 4],
[3, 6, 0, 4, 5],
[1, 2, 4, 0, 2],
[2, 4, 5, 2, 0]], dtype=float)
undirected_SP_limit_2 = np.array([[0, np.inf, np.inf, 1, 2],
[np.inf, 0, np.inf, 2, np.inf],
[np.inf, np.inf, 0, np.inf, np.inf],
[1, 2, np.inf, 0, 2],
[2, np.inf, np.inf, 2, 0]], dtype=float)
undirected_SP_limit_0 = np.ones((5, 5), dtype=float) - np.eye(5)
undirected_SP_limit_0[undirected_SP_limit_0 > 0] = np.inf
undirected_pred = np.array([[-9999, 0, 0, 0, 0],
[1, -9999, 0, 1, 1],
[2, 0, -9999, 0, 0],
[3, 3, 0, -9999, 3],
[4, 4, 0, 4, -9999]], dtype=float)
methods = ['auto', 'FW', 'D', 'BF', 'J']
def test_dijkstra_limit():
limits = [0, 2, np.inf]
results = [undirected_SP_limit_0,
undirected_SP_limit_2,
undirected_SP]
def check(limit, result):
SP = dijkstra(undirected_G, directed=False, limit=limit)
assert_array_almost_equal(SP, result)
for limit, result in zip(limits, results):
check(limit, result)
def test_directed():
def check(method):
SP = shortest_path(directed_G, method=method, directed=True,
overwrite=False)
assert_array_almost_equal(SP, directed_SP)
for method in methods:
check(method)
def test_undirected():
def check(method, directed_in):
if directed_in:
SP1 = shortest_path(directed_G, method=method, directed=False,
overwrite=False)
assert_array_almost_equal(SP1, undirected_SP)
else:
SP2 = shortest_path(undirected_G, method=method, directed=True,
overwrite=False)
assert_array_almost_equal(SP2, undirected_SP)
for method in methods:
for directed_in in (True, False):
check(method, directed_in)
def test_directed_sparse_zero():
# test directed sparse graph with zero-weight edge and two connected components
def check(method):
SP = shortest_path(directed_sparse_zero_G, method=method, directed=True,
overwrite=False)
assert_array_almost_equal(SP, directed_sparse_zero_SP)
for method in methods:
check(method)
def test_undirected_sparse_zero():
def check(method, directed_in):
if directed_in:
SP1 = shortest_path(directed_sparse_zero_G, method=method, directed=False,
overwrite=False)
assert_array_almost_equal(SP1, undirected_sparse_zero_SP)
else:
SP2 = shortest_path(undirected_sparse_zero_G, method=method, directed=True,
overwrite=False)
assert_array_almost_equal(SP2, undirected_sparse_zero_SP)
for method in methods:
for directed_in in (True, False):
check(method, directed_in)
@pytest.mark.parametrize('directed, SP_ans',
((True, directed_SP),
(False, undirected_SP)))
@pytest.mark.parametrize('indices', ([0, 2, 4], [0, 4], [3, 4], [0, 0]))
def test_dijkstra_indices_min_only(directed, SP_ans, indices):
SP_ans = np.array(SP_ans)
indices = np.array(indices, dtype=np.int64)
min_ind_ans = indices[np.argmin(SP_ans[indices, :], axis=0)]
min_d_ans = np.zeros(SP_ans.shape[0], SP_ans.dtype)
for k in range(SP_ans.shape[0]):
min_d_ans[k] = SP_ans[min_ind_ans[k], k]
min_ind_ans[np.isinf(min_d_ans)] = -9999
SP, pred, sources = dijkstra(directed_G,
directed=directed,
indices=indices,
min_only=True,
return_predecessors=True)
assert_array_almost_equal(SP, min_d_ans)
assert_array_equal(min_ind_ans, sources)
SP = dijkstra(directed_G,
directed=directed,
indices=indices,
min_only=True,
return_predecessors=False)
assert_array_almost_equal(SP, min_d_ans)
@pytest.mark.parametrize('n', (10, 100, 1000))
def test_shortest_path_min_only_random(n):
np.random.seed(1234)
data = scipy.sparse.rand(n, n, density=0.5, format='lil',
random_state=42, dtype=np.float64)
data.setdiag(np.zeros(n, dtype=np.bool_))
# choose some random vertices
v = np.arange(n)
np.random.shuffle(v)
indices = v[:int(n*.1)]
ds, pred, sources = dijkstra(data,
directed=False,
indices=indices,
min_only=True,
return_predecessors=True)
for k in range(n):
p = pred[k]
s = sources[k]
while(p != -9999):
assert(sources[p] == s)
p = pred[p]
def test_shortest_path_indices():
indices = np.arange(4)
def check(func, indshape):
outshape = indshape + (5,)
SP = func(directed_G, directed=False,
indices=indices.reshape(indshape))
assert_array_almost_equal(SP, undirected_SP[indices].reshape(outshape))
for indshape in [(4,), (4, 1), (2, 2)]:
for func in (dijkstra, bellman_ford, johnson, shortest_path):
check(func, indshape)
assert_raises(ValueError, shortest_path, directed_G, method='FW',
indices=indices)
def test_predecessors():
SP_res = {True: directed_SP,
False: undirected_SP}
pred_res = {True: directed_pred,
False: undirected_pred}
def check(method, directed):
SP, pred = shortest_path(directed_G, method, directed=directed,
overwrite=False,
return_predecessors=True)
assert_array_almost_equal(SP, SP_res[directed])
assert_array_almost_equal(pred, pred_res[directed])
for method in methods:
for directed in (True, False):
check(method, directed)
def test_construct_shortest_path():
def check(method, directed):
SP1, pred = shortest_path(directed_G,
directed=directed,
overwrite=False,
return_predecessors=True)
SP2 = construct_dist_matrix(directed_G, pred, directed=directed)
assert_array_almost_equal(SP1, SP2)
for method in methods:
for directed in (True, False):
check(method, directed)
def test_unweighted_path():
def check(method, directed):
SP1 = shortest_path(directed_G,
directed=directed,
overwrite=False,
unweighted=True)
SP2 = shortest_path(unweighted_G,
directed=directed,
overwrite=False,
unweighted=False)
assert_array_almost_equal(SP1, SP2)
for method in methods:
for directed in (True, False):
check(method, directed)
def test_negative_cycles():
# create a small graph with a negative cycle
graph = np.ones([5, 5])
graph.flat[::6] = 0
graph[1, 2] = -2
def check(method, directed):
assert_raises(NegativeCycleError, shortest_path, graph, method,
directed)
for method in ['FW', 'J', 'BF']:
for directed in (True, False):
check(method, directed)
def test_masked_input():
np.ma.masked_equal(directed_G, 0)
def check(method):
SP = shortest_path(directed_G, method=method, directed=True,
overwrite=False)
assert_array_almost_equal(SP, directed_SP)
for method in methods:
check(method)
def test_overwrite():
G = np.array([[0, 3, 3, 1, 2],
[3, 0, 0, 2, 4],
[3, 0, 0, 0, 0],
[1, 2, 0, 0, 2],
[2, 4, 0, 2, 0]], dtype=float)
foo = G.copy()
shortest_path(foo, overwrite=False)
assert_array_equal(foo, G)
@pytest.mark.parametrize('method', methods)
def test_buffer(method):
# Smoke test that sparse matrices with read-only buffers (e.g., those from
# joblib workers) do not cause::
#
# ValueError: buffer source array is read-only
#
G = scipy.sparse.csr_matrix([[1.]])
G.data.flags['WRITEABLE'] = False
shortest_path(G, method=method)
def test_NaN_warnings():
with pytest.warns(None) as record:
shortest_path(np.array([[0, 1], [np.nan, 0]]))
for r in record:
assert r.category is not RuntimeWarning
def test_sparse_matrices():
# Test that using lil,csr and csc sparse matrix do not cause error
G_dense = np.array([[0, 3, 0, 0, 0],
[0, 0, -1, 0, 0],
[0, 0, 0, 2, 0],
[0, 0, 0, 0, 4],
[0, 0, 0, 0, 0]], dtype=float)
SP = shortest_path(G_dense)
G_csr = scipy.sparse.csr_matrix(G_dense)
G_csc = scipy.sparse.csc_matrix(G_dense)
G_lil = scipy.sparse.lil_matrix(G_dense)
assert_array_almost_equal(SP, shortest_path(G_csr))
assert_array_almost_equal(SP, shortest_path(G_csc))
assert_array_almost_equal(SP, shortest_path(G_lil))

View file

@ -0,0 +1,65 @@
"""Test the minimum spanning tree function"""
import numpy as np
from numpy.testing import assert_
import numpy.testing as npt
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import minimum_spanning_tree
def test_minimum_spanning_tree():
# Create a graph with two connected components.
graph = [[0,1,0,0,0],
[1,0,0,0,0],
[0,0,0,8,5],
[0,0,8,0,1],
[0,0,5,1,0]]
graph = np.asarray(graph)
# Create the expected spanning tree.
expected = [[0,1,0,0,0],
[0,0,0,0,0],
[0,0,0,0,5],
[0,0,0,0,1],
[0,0,0,0,0]]
expected = np.asarray(expected)
# Ensure minimum spanning tree code gives this expected output.
csgraph = csr_matrix(graph)
mintree = minimum_spanning_tree(csgraph)
npt.assert_array_equal(mintree.todense(), expected,
'Incorrect spanning tree found.')
# Ensure that the original graph was not modified.
npt.assert_array_equal(csgraph.todense(), graph,
'Original graph was modified.')
# Now let the algorithm modify the csgraph in place.
mintree = minimum_spanning_tree(csgraph, overwrite=True)
npt.assert_array_equal(mintree.todense(), expected,
'Graph was not properly modified to contain MST.')
np.random.seed(1234)
for N in (5, 10, 15, 20):
# Create a random graph.
graph = 3 + np.random.random((N, N))
csgraph = csr_matrix(graph)
# The spanning tree has at most N - 1 edges.
mintree = minimum_spanning_tree(csgraph)
assert_(mintree.nnz < N)
# Set the sub diagonal to 1 to create a known spanning tree.
idx = np.arange(N-1)
graph[idx,idx+1] = 1
csgraph = csr_matrix(graph)
mintree = minimum_spanning_tree(csgraph)
# We expect to see this pattern in the spanning tree and otherwise
# have this zero.
expected = np.zeros((N, N))
expected[idx, idx+1] = 1
npt.assert_array_equal(mintree.todense(), expected,
'Incorrect spanning tree found.')

View file

@ -0,0 +1,68 @@
import numpy as np
from numpy.testing import assert_array_almost_equal
from scipy.sparse.csgraph import (breadth_first_tree, depth_first_tree,
csgraph_to_dense, csgraph_from_dense)
def test_graph_breadth_first():
csgraph = np.array([[0, 1, 2, 0, 0],
[1, 0, 0, 0, 3],
[2, 0, 0, 7, 0],
[0, 0, 7, 0, 1],
[0, 3, 0, 1, 0]])
csgraph = csgraph_from_dense(csgraph, null_value=0)
bfirst = np.array([[0, 1, 2, 0, 0],
[0, 0, 0, 0, 3],
[0, 0, 0, 7, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]])
for directed in [True, False]:
bfirst_test = breadth_first_tree(csgraph, 0, directed)
assert_array_almost_equal(csgraph_to_dense(bfirst_test),
bfirst)
def test_graph_depth_first():
csgraph = np.array([[0, 1, 2, 0, 0],
[1, 0, 0, 0, 3],
[2, 0, 0, 7, 0],
[0, 0, 7, 0, 1],
[0, 3, 0, 1, 0]])
csgraph = csgraph_from_dense(csgraph, null_value=0)
dfirst = np.array([[0, 1, 0, 0, 0],
[0, 0, 0, 0, 3],
[0, 0, 0, 0, 0],
[0, 0, 7, 0, 0],
[0, 0, 0, 1, 0]])
for directed in [True, False]:
dfirst_test = depth_first_tree(csgraph, 0, directed)
assert_array_almost_equal(csgraph_to_dense(dfirst_test),
dfirst)
def test_graph_breadth_first_trivial_graph():
csgraph = np.array([[0]])
csgraph = csgraph_from_dense(csgraph, null_value=0)
bfirst = np.array([[0]])
for directed in [True, False]:
bfirst_test = breadth_first_tree(csgraph, 0, directed)
assert_array_almost_equal(csgraph_to_dense(bfirst_test),
bfirst)
def test_graph_depth_first_trivial_graph():
csgraph = np.array([[0]])
csgraph = csgraph_from_dense(csgraph, null_value=0)
bfirst = np.array([[0]])
for directed in [True, False]:
bfirst_test = depth_first_tree(csgraph, 0, directed)
assert_array_almost_equal(csgraph_to_dense(bfirst_test),
bfirst)

View file

@ -0,0 +1,358 @@
"""Compressed Sparse Row matrix format"""
__docformat__ = "restructuredtext en"
__all__ = ['csr_matrix', 'isspmatrix_csr']
import numpy as np
from .base import spmatrix
from ._sparsetools import (csr_tocsc, csr_tobsr, csr_count_blocks,
get_csr_submatrix)
from .sputils import upcast, get_index_dtype
from .compressed import _cs_matrix
class csr_matrix(_cs_matrix):
"""
Compressed Sparse Row matrix
This can be instantiated in several ways:
csr_matrix(D)
with a dense matrix or rank-2 ndarray D
csr_matrix(S)
with another sparse matrix S (equivalent to S.tocsr())
csr_matrix((M, N), [dtype])
to construct an empty matrix with shape (M, N)
dtype is optional, defaulting to dtype='d'.
csr_matrix((data, (row_ind, col_ind)), [shape=(M, N)])
where ``data``, ``row_ind`` and ``col_ind`` satisfy the
relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
csr_matrix((data, indices, indptr), [shape=(M, N)])
is the standard CSR representation where the column indices for
row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their
corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
If the shape parameter is not supplied, the matrix dimensions
are inferred from the index arrays.
Attributes
----------
dtype : dtype
Data type of the matrix
shape : 2-tuple
Shape of the matrix
ndim : int
Number of dimensions (this is always 2)
nnz
Number of stored values, including explicit zeros
data
CSR format data array of the matrix
indices
CSR format index array of the matrix
indptr
CSR format index pointer array of the matrix
has_sorted_indices
Whether indices are sorted
Notes
-----
Sparse matrices can be used in arithmetic operations: they support
addition, subtraction, multiplication, division, and matrix power.
Advantages of the CSR format
- efficient arithmetic operations CSR + CSR, CSR * CSR, etc.
- efficient row slicing
- fast matrix vector products
Disadvantages of the CSR format
- slow column slicing operations (consider CSC)
- changes to the sparsity structure are expensive (consider LIL or DOK)
Examples
--------
>>> import numpy as np
>>> from scipy.sparse import csr_matrix
>>> csr_matrix((3, 4), dtype=np.int8).toarray()
array([[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]], dtype=int8)
>>> row = np.array([0, 0, 1, 2, 2, 2])
>>> col = np.array([0, 2, 2, 0, 1, 2])
>>> data = np.array([1, 2, 3, 4, 5, 6])
>>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
array([[1, 0, 2],
[0, 0, 3],
[4, 5, 6]])
>>> indptr = np.array([0, 2, 3, 6])
>>> indices = np.array([0, 2, 2, 0, 1, 2])
>>> data = np.array([1, 2, 3, 4, 5, 6])
>>> csr_matrix((data, indices, indptr), shape=(3, 3)).toarray()
array([[1, 0, 2],
[0, 0, 3],
[4, 5, 6]])
Duplicate entries are summed together:
>>> row = np.array([0, 1, 2, 0])
>>> col = np.array([0, 1, 1, 0])
>>> data = np.array([1, 2, 4, 8])
>>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
array([[9, 0, 0],
[0, 2, 0],
[0, 4, 0]])
As an example of how to construct a CSR matrix incrementally,
the following snippet builds a term-document matrix from texts:
>>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]
>>> indptr = [0]
>>> indices = []
>>> data = []
>>> vocabulary = {}
>>> for d in docs:
... for term in d:
... index = vocabulary.setdefault(term, len(vocabulary))
... indices.append(index)
... data.append(1)
... indptr.append(len(indices))
...
>>> csr_matrix((data, indices, indptr), dtype=int).toarray()
array([[2, 1, 0, 0],
[0, 1, 1, 1]])
"""
format = 'csr'
def transpose(self, axes=None, copy=False):
if axes is not None:
raise ValueError(("Sparse matrices do not support "
"an 'axes' parameter because swapping "
"dimensions is the only logical permutation."))
M, N = self.shape
from .csc import csc_matrix
return csc_matrix((self.data, self.indices,
self.indptr), shape=(N, M), copy=copy)
transpose.__doc__ = spmatrix.transpose.__doc__
def tolil(self, copy=False):
from .lil import lil_matrix
lil = lil_matrix(self.shape,dtype=self.dtype)
self.sum_duplicates()
ptr,ind,dat = self.indptr,self.indices,self.data
rows, data = lil.rows, lil.data
for n in range(self.shape[0]):
start = ptr[n]
end = ptr[n+1]
rows[n] = ind[start:end].tolist()
data[n] = dat[start:end].tolist()
return lil
tolil.__doc__ = spmatrix.tolil.__doc__
def tocsr(self, copy=False):
if copy:
return self.copy()
else:
return self
tocsr.__doc__ = spmatrix.tocsr.__doc__
def tocsc(self, copy=False):
idx_dtype = get_index_dtype((self.indptr, self.indices),
maxval=max(self.nnz, self.shape[0]))
indptr = np.empty(self.shape[1] + 1, dtype=idx_dtype)
indices = np.empty(self.nnz, dtype=idx_dtype)
data = np.empty(self.nnz, dtype=upcast(self.dtype))
csr_tocsc(self.shape[0], self.shape[1],
self.indptr.astype(idx_dtype),
self.indices.astype(idx_dtype),
self.data,
indptr,
indices,
data)
from .csc import csc_matrix
A = csc_matrix((data, indices, indptr), shape=self.shape)
A.has_sorted_indices = True
return A
tocsc.__doc__ = spmatrix.tocsc.__doc__
def tobsr(self, blocksize=None, copy=True):
from .bsr import bsr_matrix
if blocksize is None:
from .spfuncs import estimate_blocksize
return self.tobsr(blocksize=estimate_blocksize(self))
elif blocksize == (1,1):
arg1 = (self.data.reshape(-1,1,1),self.indices,self.indptr)
return bsr_matrix(arg1, shape=self.shape, copy=copy)
else:
R,C = blocksize
M,N = self.shape
if R < 1 or C < 1 or M % R != 0 or N % C != 0:
raise ValueError('invalid blocksize %s' % blocksize)
blks = csr_count_blocks(M,N,R,C,self.indptr,self.indices)
idx_dtype = get_index_dtype((self.indptr, self.indices),
maxval=max(N//C, blks))
indptr = np.empty(M//R+1, dtype=idx_dtype)
indices = np.empty(blks, dtype=idx_dtype)
data = np.zeros((blks,R,C), dtype=self.dtype)
csr_tobsr(M, N, R, C,
self.indptr.astype(idx_dtype),
self.indices.astype(idx_dtype),
self.data,
indptr, indices, data.ravel())
return bsr_matrix((data,indices,indptr), shape=self.shape)
tobsr.__doc__ = spmatrix.tobsr.__doc__
# these functions are used by the parent class (_cs_matrix)
# to remove redudancy between csc_matrix and csr_matrix
def _swap(self, x):
"""swap the members of x if this is a column-oriented matrix
"""
return x
def __iter__(self):
indptr = np.zeros(2, dtype=self.indptr.dtype)
shape = (1, self.shape[1])
i0 = 0
for i1 in self.indptr[1:]:
indptr[1] = i1 - i0
indices = self.indices[i0:i1]
data = self.data[i0:i1]
yield csr_matrix((data, indices, indptr), shape=shape, copy=True)
i0 = i1
def getrow(self, i):
"""Returns a copy of row i of the matrix, as a (1 x n)
CSR matrix (row vector).
"""
M, N = self.shape
i = int(i)
if i < 0:
i += M
if i < 0 or i >= M:
raise IndexError('index (%d) out of range' % i)
indptr, indices, data = get_csr_submatrix(
M, N, self.indptr, self.indices, self.data, i, i + 1, 0, N)
return csr_matrix((data, indices, indptr), shape=(1, N),
dtype=self.dtype, copy=False)
def getcol(self, i):
"""Returns a copy of column i of the matrix, as a (m x 1)
CSR matrix (column vector).
"""
M, N = self.shape
i = int(i)
if i < 0:
i += N
if i < 0 or i >= N:
raise IndexError('index (%d) out of range' % i)
indptr, indices, data = get_csr_submatrix(
M, N, self.indptr, self.indices, self.data, 0, M, i, i + 1)
return csr_matrix((data, indices, indptr), shape=(M, 1),
dtype=self.dtype, copy=False)
def _get_intXarray(self, row, col):
return self.getrow(row)._minor_index_fancy(col)
def _get_intXslice(self, row, col):
if col.step in (1, None):
return self._get_submatrix(row, col, copy=True)
# TODO: uncomment this once it's faster:
# return self.getrow(row)._minor_slice(col)
M, N = self.shape
start, stop, stride = col.indices(N)
ii, jj = self.indptr[row:row+2]
row_indices = self.indices[ii:jj]
row_data = self.data[ii:jj]
if stride > 0:
ind = (row_indices >= start) & (row_indices < stop)
else:
ind = (row_indices <= start) & (row_indices > stop)
if abs(stride) > 1:
ind &= (row_indices - start) % stride == 0
row_indices = (row_indices[ind] - start) // stride
row_data = row_data[ind]
row_indptr = np.array([0, len(row_indices)])
if stride < 0:
row_data = row_data[::-1]
row_indices = abs(row_indices[::-1])
shape = (1, int(np.ceil(float(stop - start) / stride)))
return csr_matrix((row_data, row_indices, row_indptr), shape=shape,
dtype=self.dtype, copy=False)
def _get_sliceXint(self, row, col):
if row.step in (1, None):
return self._get_submatrix(row, col, copy=True)
return self._major_slice(row)._get_submatrix(minor=col)
def _get_sliceXarray(self, row, col):
return self._major_slice(row)._minor_index_fancy(col)
def _get_arrayXint(self, row, col):
return self._major_index_fancy(row)._get_submatrix(minor=col)
def _get_arrayXslice(self, row, col):
if col.step not in (1, None):
col = np.arange(*col.indices(self.shape[1]))
return self._get_arrayXarray(row, col)
return self._major_index_fancy(row)._get_submatrix(minor=col)
def isspmatrix_csr(x):
"""Is x of csr_matrix type?
Parameters
----------
x
object to check for being a csr matrix
Returns
-------
bool
True if x is a csr matrix, False otherwise
Examples
--------
>>> from scipy.sparse import csr_matrix, isspmatrix_csr
>>> isspmatrix_csr(csr_matrix([[5]]))
True
>>> from scipy.sparse import csc_matrix, csr_matrix, isspmatrix_csc
>>> isspmatrix_csr(csc_matrix([[5]]))
False
"""
return isinstance(x, csr_matrix)

View file

@ -0,0 +1,397 @@
"""Base class for sparse matrice with a .data attribute
subclasses must provide a _with_data() method that
creates a new matrix with the same sparsity pattern
as self but with a different data array
"""
import numpy as np
from .base import spmatrix, _ufuncs_with_fixed_point_at_zero
from .sputils import isscalarlike, validateaxis, matrix
__all__ = []
# TODO implement all relevant operations
# use .data.__methods__() instead of /=, *=, etc.
class _data_matrix(spmatrix):
def __init__(self):
spmatrix.__init__(self)
def _get_dtype(self):
return self.data.dtype
def _set_dtype(self, newtype):
self.data.dtype = newtype
dtype = property(fget=_get_dtype, fset=_set_dtype)
def _deduped_data(self):
if hasattr(self, 'sum_duplicates'):
self.sum_duplicates()
return self.data
def __abs__(self):
return self._with_data(abs(self._deduped_data()))
def __round__(self, ndigits=0):
return self._with_data(np.around(self._deduped_data(), decimals=ndigits))
def _real(self):
return self._with_data(self.data.real)
def _imag(self):
return self._with_data(self.data.imag)
def __neg__(self):
if self.dtype.kind == 'b':
raise NotImplementedError('negating a sparse boolean '
'matrix is not supported')
return self._with_data(-self.data)
def __imul__(self, other): # self *= other
if isscalarlike(other):
self.data *= other
return self
else:
return NotImplemented
def __itruediv__(self, other): # self /= other
if isscalarlike(other):
recip = 1.0 / other
self.data *= recip
return self
else:
return NotImplemented
def astype(self, dtype, casting='unsafe', copy=True):
dtype = np.dtype(dtype)
if self.dtype != dtype:
return self._with_data(
self._deduped_data().astype(dtype, casting=casting, copy=copy),
copy=copy)
elif copy:
return self.copy()
else:
return self
astype.__doc__ = spmatrix.astype.__doc__
def conj(self, copy=True):
if np.issubdtype(self.dtype, np.complexfloating):
return self._with_data(self.data.conj(), copy=copy)
elif copy:
return self.copy()
else:
return self
conj.__doc__ = spmatrix.conj.__doc__
def copy(self):
return self._with_data(self.data.copy(), copy=True)
copy.__doc__ = spmatrix.copy.__doc__
def count_nonzero(self):
return np.count_nonzero(self._deduped_data())
count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__
def power(self, n, dtype=None):
"""
This function performs element-wise power.
Parameters
----------
n : n is a scalar
dtype : If dtype is not specified, the current dtype will be preserved.
"""
if not isscalarlike(n):
raise NotImplementedError("input is not scalar")
data = self._deduped_data()
if dtype is not None:
data = data.astype(dtype)
return self._with_data(data ** n)
###########################
# Multiplication handlers #
###########################
def _mul_scalar(self, other):
return self._with_data(self.data * other)
# Add the numpy unary ufuncs for which func(0) = 0 to _data_matrix.
for npfunc in _ufuncs_with_fixed_point_at_zero:
name = npfunc.__name__
def _create_method(op):
def method(self):
result = op(self._deduped_data())
return self._with_data(result, copy=True)
method.__doc__ = ("Element-wise %s.\n\n"
"See `numpy.%s` for more information." % (name, name))
method.__name__ = name
return method
setattr(_data_matrix, name, _create_method(npfunc))
def _find_missing_index(ind, n):
for k, a in enumerate(ind):
if k != a:
return k
k += 1
if k < n:
return k
else:
return -1
class _minmax_mixin(object):
"""Mixin for min and max methods.
These are not implemented for dia_matrix, hence the separate class.
"""
def _min_or_max_axis(self, axis, min_or_max):
N = self.shape[axis]
if N == 0:
raise ValueError("zero-size array to reduction operation")
M = self.shape[1 - axis]
mat = self.tocsc() if axis == 0 else self.tocsr()
mat.sum_duplicates()
major_index, value = mat._minor_reduce(min_or_max)
not_full = np.diff(mat.indptr)[major_index] < N
value[not_full] = min_or_max(value[not_full], 0)
mask = value != 0
major_index = np.compress(mask, major_index)
value = np.compress(mask, value)
from . import coo_matrix
if axis == 0:
return coo_matrix((value, (np.zeros(len(value)), major_index)),
dtype=self.dtype, shape=(1, M))
else:
return coo_matrix((value, (major_index, np.zeros(len(value)))),
dtype=self.dtype, shape=(M, 1))
def _min_or_max(self, axis, out, min_or_max):
if out is not None:
raise ValueError(("Sparse matrices do not support "
"an 'out' parameter."))
validateaxis(axis)
if axis is None:
if 0 in self.shape:
raise ValueError("zero-size array to reduction operation")
zero = self.dtype.type(0)
if self.nnz == 0:
return zero
m = min_or_max.reduce(self._deduped_data().ravel())
if self.nnz != np.prod(self.shape):
m = min_or_max(zero, m)
return m
if axis < 0:
axis += 2
if (axis == 0) or (axis == 1):
return self._min_or_max_axis(axis, min_or_max)
else:
raise ValueError("axis out of range")
def _arg_min_or_max_axis(self, axis, op, compare):
if self.shape[axis] == 0:
raise ValueError("Can't apply the operation along a zero-sized "
"dimension.")
if axis < 0:
axis += 2
zero = self.dtype.type(0)
mat = self.tocsc() if axis == 0 else self.tocsr()
mat.sum_duplicates()
ret_size, line_size = mat._swap(mat.shape)
ret = np.zeros(ret_size, dtype=int)
nz_lines, = np.nonzero(np.diff(mat.indptr))
for i in nz_lines:
p, q = mat.indptr[i:i + 2]
data = mat.data[p:q]
indices = mat.indices[p:q]
am = op(data)
m = data[am]
if compare(m, zero) or q - p == line_size:
ret[i] = indices[am]
else:
zero_ind = _find_missing_index(indices, line_size)
if m == zero:
ret[i] = min(am, zero_ind)
else:
ret[i] = zero_ind
if axis == 1:
ret = ret.reshape(-1, 1)
return matrix(ret)
def _arg_min_or_max(self, axis, out, op, compare):
if out is not None:
raise ValueError("Sparse matrices do not support "
"an 'out' parameter.")
validateaxis(axis)
if axis is None:
if 0 in self.shape:
raise ValueError("Can't apply the operation to "
"an empty matrix.")
if self.nnz == 0:
return 0
else:
zero = self.dtype.type(0)
mat = self.tocoo()
mat.sum_duplicates()
am = op(mat.data)
m = mat.data[am]
if compare(m, zero):
return mat.row[am] * mat.shape[1] + mat.col[am]
else:
size = np.prod(mat.shape)
if size == mat.nnz:
return am
else:
ind = mat.row * mat.shape[1] + mat.col
zero_ind = _find_missing_index(ind, size)
if m == zero:
return min(zero_ind, am)
else:
return zero_ind
return self._arg_min_or_max_axis(axis, op, compare)
def max(self, axis=None, out=None):
"""
Return the maximum of the matrix or maximum along an axis.
This takes all elements into account, not just the non-zero ones.
Parameters
----------
axis : {-2, -1, 0, 1, None} optional
Axis along which the sum is computed. The default is to
compute the maximum over all the matrix elements, returning
a scalar (i.e., `axis` = `None`).
out : None, optional
This argument is in the signature *solely* for NumPy
compatibility reasons. Do not pass in anything except
for the default value, as this argument is not used.
Returns
-------
amax : coo_matrix or scalar
Maximum of `a`. If `axis` is None, the result is a scalar value.
If `axis` is given, the result is a sparse.coo_matrix of dimension
``a.ndim - 1``.
See Also
--------
min : The minimum value of a sparse matrix along a given axis.
numpy.matrix.max : NumPy's implementation of 'max' for matrices
"""
return self._min_or_max(axis, out, np.maximum)
def min(self, axis=None, out=None):
"""
Return the minimum of the matrix or maximum along an axis.
This takes all elements into account, not just the non-zero ones.
Parameters
----------
axis : {-2, -1, 0, 1, None} optional
Axis along which the sum is computed. The default is to
compute the minimum over all the matrix elements, returning
a scalar (i.e., `axis` = `None`).
out : None, optional
This argument is in the signature *solely* for NumPy
compatibility reasons. Do not pass in anything except for
the default value, as this argument is not used.
Returns
-------
amin : coo_matrix or scalar
Minimum of `a`. If `axis` is None, the result is a scalar value.
If `axis` is given, the result is a sparse.coo_matrix of dimension
``a.ndim - 1``.
See Also
--------
max : The maximum value of a sparse matrix along a given axis.
numpy.matrix.min : NumPy's implementation of 'min' for matrices
"""
return self._min_or_max(axis, out, np.minimum)
def argmax(self, axis=None, out=None):
"""Return indices of maximum elements along an axis.
Implicit zero elements are also taken into account. If there are
several maximum values, the index of the first occurrence is returned.
Parameters
----------
axis : {-2, -1, 0, 1, None}, optional
Axis along which the argmax is computed. If None (default), index
of the maximum element in the flatten data is returned.
out : None, optional
This argument is in the signature *solely* for NumPy
compatibility reasons. Do not pass in anything except for
the default value, as this argument is not used.
Returns
-------
ind : numpy.matrix or int
Indices of maximum elements. If matrix, its size along `axis` is 1.
"""
return self._arg_min_or_max(axis, out, np.argmax, np.greater)
def argmin(self, axis=None, out=None):
"""Return indices of minimum elements along an axis.
Implicit zero elements are also taken into account. If there are
several minimum values, the index of the first occurrence is returned.
Parameters
----------
axis : {-2, -1, 0, 1, None}, optional
Axis along which the argmin is computed. If None (default), index
of the minimum element in the flatten data is returned.
out : None, optional
This argument is in the signature *solely* for NumPy
compatibility reasons. Do not pass in anything except for
the default value, as this argument is not used.
Returns
-------
ind : numpy.matrix or int
Indices of minimum elements. If matrix, its size along `axis` is 1.
"""
return self._arg_min_or_max(axis, out, np.argmin, np.less)

View file

@ -0,0 +1,431 @@
"""Sparse DIAgonal format"""
__docformat__ = "restructuredtext en"
__all__ = ['dia_matrix', 'isspmatrix_dia']
import numpy as np
from .base import isspmatrix, _formats, spmatrix
from .data import _data_matrix
from .sputils import (isshape, upcast_char, getdtype, get_index_dtype,
get_sum_dtype, validateaxis, check_shape, matrix)
from ._sparsetools import dia_matvec
class dia_matrix(_data_matrix):
"""Sparse matrix with DIAgonal storage
This can be instantiated in several ways:
dia_matrix(D)
with a dense matrix
dia_matrix(S)
with another sparse matrix S (equivalent to S.todia())
dia_matrix((M, N), [dtype])
to construct an empty matrix with shape (M, N),
dtype is optional, defaulting to dtype='d'.
dia_matrix((data, offsets), shape=(M, N))
where the ``data[k,:]`` stores the diagonal entries for
diagonal ``offsets[k]`` (See example below)
Attributes
----------
dtype : dtype
Data type of the matrix
shape : 2-tuple
Shape of the matrix
ndim : int
Number of dimensions (this is always 2)
nnz
Number of stored values, including explicit zeros
data
DIA format data array of the matrix
offsets
DIA format offset array of the matrix
Notes
-----
Sparse matrices can be used in arithmetic operations: they support
addition, subtraction, multiplication, division, and matrix power.
Examples
--------
>>> import numpy as np
>>> from scipy.sparse import dia_matrix
>>> dia_matrix((3, 4), dtype=np.int8).toarray()
array([[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]], dtype=int8)
>>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0)
>>> offsets = np.array([0, -1, 2])
>>> dia_matrix((data, offsets), shape=(4, 4)).toarray()
array([[1, 0, 3, 0],
[1, 2, 0, 4],
[0, 2, 3, 0],
[0, 0, 3, 4]])
>>> from scipy.sparse import dia_matrix
>>> n = 10
>>> ex = np.ones(n)
>>> data = np.array([ex, 2 * ex, ex])
>>> offsets = np.array([-1, 0, 1])
>>> dia_matrix((data, offsets), shape=(n, n)).toarray()
array([[2., 1., 0., ..., 0., 0., 0.],
[1., 2., 1., ..., 0., 0., 0.],
[0., 1., 2., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 2., 1., 0.],
[0., 0., 0., ..., 1., 2., 1.],
[0., 0., 0., ..., 0., 1., 2.]])
"""
format = 'dia'
def __init__(self, arg1, shape=None, dtype=None, copy=False):
_data_matrix.__init__(self)
if isspmatrix_dia(arg1):
if copy:
arg1 = arg1.copy()
self.data = arg1.data
self.offsets = arg1.offsets
self._shape = check_shape(arg1.shape)
elif isspmatrix(arg1):
if isspmatrix_dia(arg1) and copy:
A = arg1.copy()
else:
A = arg1.todia()
self.data = A.data
self.offsets = A.offsets
self._shape = check_shape(A.shape)
elif isinstance(arg1, tuple):
if isshape(arg1):
# It's a tuple of matrix dimensions (M, N)
# create empty matrix
self._shape = check_shape(arg1)
self.data = np.zeros((0,0), getdtype(dtype, default=float))
idx_dtype = get_index_dtype(maxval=max(self.shape))
self.offsets = np.zeros((0), dtype=idx_dtype)
else:
try:
# Try interpreting it as (data, offsets)
data, offsets = arg1
except Exception:
raise ValueError('unrecognized form for dia_matrix constructor')
else:
if shape is None:
raise ValueError('expected a shape argument')
self.data = np.atleast_2d(np.array(arg1[0], dtype=dtype, copy=copy))
self.offsets = np.atleast_1d(np.array(arg1[1],
dtype=get_index_dtype(maxval=max(shape)),
copy=copy))
self._shape = check_shape(shape)
else:
#must be dense, convert to COO first, then to DIA
try:
arg1 = np.asarray(arg1)
except Exception:
raise ValueError("unrecognized form for"
" %s_matrix constructor" % self.format)
from .coo import coo_matrix
A = coo_matrix(arg1, dtype=dtype, shape=shape).todia()
self.data = A.data
self.offsets = A.offsets
self._shape = check_shape(A.shape)
if dtype is not None:
self.data = self.data.astype(dtype)
#check format
if self.offsets.ndim != 1:
raise ValueError('offsets array must have rank 1')
if self.data.ndim != 2:
raise ValueError('data array must have rank 2')
if self.data.shape[0] != len(self.offsets):
raise ValueError('number of diagonals (%d) '
'does not match the number of offsets (%d)'
% (self.data.shape[0], len(self.offsets)))
if len(np.unique(self.offsets)) != len(self.offsets):
raise ValueError('offset array contains duplicate values')
def __repr__(self):
format = _formats[self.getformat()][1]
return "<%dx%d sparse matrix of type '%s'\n" \
"\twith %d stored elements (%d diagonals) in %s format>" % \
(self.shape + (self.dtype.type, self.nnz, self.data.shape[0],
format))
def _data_mask(self):
"""Returns a mask of the same shape as self.data, where
mask[i,j] is True when data[i,j] corresponds to a stored element."""
num_rows, num_cols = self.shape
offset_inds = np.arange(self.data.shape[1])
row = offset_inds - self.offsets[:,None]
mask = (row >= 0)
mask &= (row < num_rows)
mask &= (offset_inds < num_cols)
return mask
def count_nonzero(self):
mask = self._data_mask()
return np.count_nonzero(self.data[mask])
def getnnz(self, axis=None):
if axis is not None:
raise NotImplementedError("getnnz over an axis is not implemented "
"for DIA format")
M,N = self.shape
nnz = 0
for k in self.offsets:
if k > 0:
nnz += min(M,N-k)
else:
nnz += min(M+k,N)
return int(nnz)
getnnz.__doc__ = spmatrix.getnnz.__doc__
count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__
def sum(self, axis=None, dtype=None, out=None):
validateaxis(axis)
if axis is not None and axis < 0:
axis += 2
res_dtype = get_sum_dtype(self.dtype)
num_rows, num_cols = self.shape
ret = None
if axis == 0:
mask = self._data_mask()
x = (self.data * mask).sum(axis=0)
if x.shape[0] == num_cols:
res = x
else:
res = np.zeros(num_cols, dtype=x.dtype)
res[:x.shape[0]] = x
ret = matrix(res, dtype=res_dtype)
else:
row_sums = np.zeros(num_rows, dtype=res_dtype)
one = np.ones(num_cols, dtype=res_dtype)
dia_matvec(num_rows, num_cols, len(self.offsets),
self.data.shape[1], self.offsets, self.data, one, row_sums)
row_sums = matrix(row_sums)
if axis is None:
return row_sums.sum(dtype=dtype, out=out)
if axis is not None:
row_sums = row_sums.T
ret = matrix(row_sums.sum(axis=axis))
if out is not None and out.shape != ret.shape:
raise ValueError("dimensions do not match")
return ret.sum(axis=(), dtype=dtype, out=out)
sum.__doc__ = spmatrix.sum.__doc__
def _mul_vector(self, other):
x = other
y = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char,
x.dtype.char))
L = self.data.shape[1]
M,N = self.shape
dia_matvec(M,N, len(self.offsets), L, self.offsets, self.data, x.ravel(), y.ravel())
return y
def _mul_multimatrix(self, other):
return np.hstack([self._mul_vector(col).reshape(-1,1) for col in other.T])
def _setdiag(self, values, k=0):
M, N = self.shape
if values.ndim == 0:
# broadcast
values_n = np.inf
else:
values_n = len(values)
if k < 0:
n = min(M + k, N, values_n)
min_index = 0
max_index = n
else:
n = min(M, N - k, values_n)
min_index = k
max_index = k + n
if values.ndim != 0:
# allow also longer sequences
values = values[:n]
if k in self.offsets:
self.data[self.offsets == k, min_index:max_index] = values
else:
self.offsets = np.append(self.offsets, self.offsets.dtype.type(k))
m = max(max_index, self.data.shape[1])
data = np.zeros((self.data.shape[0]+1, m), dtype=self.data.dtype)
data[:-1,:self.data.shape[1]] = self.data
data[-1, min_index:max_index] = values
self.data = data
def todia(self, copy=False):
if copy:
return self.copy()
else:
return self
todia.__doc__ = spmatrix.todia.__doc__
def transpose(self, axes=None, copy=False):
if axes is not None:
raise ValueError(("Sparse matrices do not support "
"an 'axes' parameter because swapping "
"dimensions is the only logical permutation."))
num_rows, num_cols = self.shape
max_dim = max(self.shape)
# flip diagonal offsets
offsets = -self.offsets
# re-align the data matrix
r = np.arange(len(offsets), dtype=np.intc)[:, None]
c = np.arange(num_rows, dtype=np.intc) - (offsets % max_dim)[:, None]
pad_amount = max(0, max_dim-self.data.shape[1])
data = np.hstack((self.data, np.zeros((self.data.shape[0], pad_amount),
dtype=self.data.dtype)))
data = data[r, c]
return dia_matrix((data, offsets), shape=(
num_cols, num_rows), copy=copy)
transpose.__doc__ = spmatrix.transpose.__doc__
def diagonal(self, k=0):
rows, cols = self.shape
if k <= -rows or k >= cols:
return np.empty(0, dtype=self.data.dtype)
idx, = np.nonzero(self.offsets == k)
first_col, last_col = max(0, k), min(rows + k, cols)
if idx.size == 0:
return np.zeros(last_col - first_col, dtype=self.data.dtype)
return self.data[idx[0], first_col:last_col]
diagonal.__doc__ = spmatrix.diagonal.__doc__
def tocsc(self, copy=False):
from .csc import csc_matrix
if self.nnz == 0:
return csc_matrix(self.shape, dtype=self.dtype)
num_rows, num_cols = self.shape
num_offsets, offset_len = self.data.shape
offset_inds = np.arange(offset_len)
row = offset_inds - self.offsets[:,None]
mask = (row >= 0)
mask &= (row < num_rows)
mask &= (offset_inds < num_cols)
mask &= (self.data != 0)
idx_dtype = get_index_dtype(maxval=max(self.shape))
indptr = np.zeros(num_cols + 1, dtype=idx_dtype)
indptr[1:offset_len+1] = np.cumsum(mask.sum(axis=0))
indptr[offset_len+1:] = indptr[offset_len]
indices = row.T[mask.T].astype(idx_dtype, copy=False)
data = self.data.T[mask.T]
return csc_matrix((data, indices, indptr), shape=self.shape,
dtype=self.dtype)
tocsc.__doc__ = spmatrix.tocsc.__doc__
def tocoo(self, copy=False):
num_rows, num_cols = self.shape
num_offsets, offset_len = self.data.shape
offset_inds = np.arange(offset_len)
row = offset_inds - self.offsets[:,None]
mask = (row >= 0)
mask &= (row < num_rows)
mask &= (offset_inds < num_cols)
mask &= (self.data != 0)
row = row[mask]
col = np.tile(offset_inds, num_offsets)[mask.ravel()]
data = self.data[mask]
from .coo import coo_matrix
A = coo_matrix((data,(row,col)), shape=self.shape, dtype=self.dtype)
A.has_canonical_format = True
return A
tocoo.__doc__ = spmatrix.tocoo.__doc__
# needed by _data_matrix
def _with_data(self, data, copy=True):
"""Returns a matrix with the same sparsity structure as self,
but with different data. By default the structure arrays are copied.
"""
if copy:
return dia_matrix((data, self.offsets.copy()), shape=self.shape)
else:
return dia_matrix((data,self.offsets), shape=self.shape)
def resize(self, *shape):
shape = check_shape(shape)
M, N = shape
# we do not need to handle the case of expanding N
self.data = self.data[:, :N]
if (M > self.shape[0] and
np.any(self.offsets + self.shape[0] < self.data.shape[1])):
# explicitly clear values that were previously hidden
mask = (self.offsets[:, None] + self.shape[0] <=
np.arange(self.data.shape[1]))
self.data[mask] = 0
self._shape = shape
resize.__doc__ = spmatrix.resize.__doc__
def isspmatrix_dia(x):
"""Is x of dia_matrix type?
Parameters
----------
x
object to check for being a dia matrix
Returns
-------
bool
True if x is a dia matrix, False otherwise
Examples
--------
>>> from scipy.sparse import dia_matrix, isspmatrix_dia
>>> isspmatrix_dia(dia_matrix([[5]]))
True
>>> from scipy.sparse import dia_matrix, csr_matrix, isspmatrix_dia
>>> isspmatrix_dia(csr_matrix([[5]]))
False
"""
return isinstance(x, dia_matrix)

View file

@ -0,0 +1,453 @@
"""Dictionary Of Keys based matrix"""
__docformat__ = "restructuredtext en"
__all__ = ['dok_matrix', 'isspmatrix_dok']
import itertools
import numpy as np
from .base import spmatrix, isspmatrix
from ._index import IndexMixin
from .sputils import (isdense, getdtype, isshape, isintlike, isscalarlike,
upcast, upcast_scalar, get_index_dtype, check_shape)
try:
from operator import isSequenceType as _is_sequence
except ImportError:
def _is_sequence(x):
return (hasattr(x, '__len__') or hasattr(x, '__next__')
or hasattr(x, 'next'))
class dok_matrix(spmatrix, IndexMixin, dict):
"""
Dictionary Of Keys based sparse matrix.
This is an efficient structure for constructing sparse
matrices incrementally.
This can be instantiated in several ways:
dok_matrix(D)
with a dense matrix, D
dok_matrix(S)
with a sparse matrix, S
dok_matrix((M,N), [dtype])
create the matrix with initial shape (M,N)
dtype is optional, defaulting to dtype='d'
Attributes
----------
dtype : dtype
Data type of the matrix
shape : 2-tuple
Shape of the matrix
ndim : int
Number of dimensions (this is always 2)
nnz
Number of nonzero elements
Notes
-----
Sparse matrices can be used in arithmetic operations: they support
addition, subtraction, multiplication, division, and matrix power.
Allows for efficient O(1) access of individual elements.
Duplicates are not allowed.
Can be efficiently converted to a coo_matrix once constructed.
Examples
--------
>>> import numpy as np
>>> from scipy.sparse import dok_matrix
>>> S = dok_matrix((5, 5), dtype=np.float32)
>>> for i in range(5):
... for j in range(5):
... S[i, j] = i + j # Update element
"""
format = 'dok'
def __init__(self, arg1, shape=None, dtype=None, copy=False):
dict.__init__(self)
spmatrix.__init__(self)
self.dtype = getdtype(dtype, default=float)
if isinstance(arg1, tuple) and isshape(arg1): # (M,N)
M, N = arg1
self._shape = check_shape((M, N))
elif isspmatrix(arg1): # Sparse ctor
if isspmatrix_dok(arg1) and copy:
arg1 = arg1.copy()
else:
arg1 = arg1.todok()
if dtype is not None:
arg1 = arg1.astype(dtype, copy=False)
dict.update(self, arg1)
self._shape = check_shape(arg1.shape)
self.dtype = arg1.dtype
else: # Dense ctor
try:
arg1 = np.asarray(arg1)
except Exception:
raise TypeError('Invalid input format.')
if len(arg1.shape) != 2:
raise TypeError('Expected rank <=2 dense array or matrix.')
from .coo import coo_matrix
d = coo_matrix(arg1, dtype=dtype).todok()
dict.update(self, d)
self._shape = check_shape(arg1.shape)
self.dtype = d.dtype
def update(self, val):
# Prevent direct usage of update
raise NotImplementedError("Direct modification to dok_matrix element "
"is not allowed.")
def _update(self, data):
"""An update method for dict data defined for direct access to
`dok_matrix` data. Main purpose is to be used for effcient conversion
from other spmatrix classes. Has no checking if `data` is valid."""
return dict.update(self, data)
def set_shape(self, shape):
new_matrix = self.reshape(shape, copy=False).asformat(self.format)
self.__dict__ = new_matrix.__dict__
dict.clear(self)
dict.update(self, new_matrix)
shape = property(fget=spmatrix.get_shape, fset=set_shape)
def getnnz(self, axis=None):
if axis is not None:
raise NotImplementedError("getnnz over an axis is not implemented "
"for DOK format.")
return dict.__len__(self)
def count_nonzero(self):
return sum(x != 0 for x in self.values())
getnnz.__doc__ = spmatrix.getnnz.__doc__
count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__
def __len__(self):
return dict.__len__(self)
def get(self, key, default=0.):
"""This overrides the dict.get method, providing type checking
but otherwise equivalent functionality.
"""
try:
i, j = key
assert isintlike(i) and isintlike(j)
except (AssertionError, TypeError, ValueError):
raise IndexError('Index must be a pair of integers.')
if (i < 0 or i >= self.shape[0] or j < 0 or j >= self.shape[1]):
raise IndexError('Index out of bounds.')
return dict.get(self, key, default)
def _get_intXint(self, row, col):
return dict.get(self, (row, col), self.dtype.type(0))
def _get_intXslice(self, row, col):
return self._get_sliceXslice(slice(row, row+1), col)
def _get_sliceXint(self, row, col):
return self._get_sliceXslice(row, slice(col, col+1))
def _get_sliceXslice(self, row, col):
row_start, row_stop, row_step = row.indices(self.shape[0])
col_start, col_stop, col_step = col.indices(self.shape[1])
row_range = range(row_start, row_stop, row_step)
col_range = range(col_start, col_stop, col_step)
shape = (len(row_range), len(col_range))
# Switch paths only when advantageous
# (count the iterations in the loops, adjust for complexity)
if len(self) >= 2 * shape[0] * shape[1]:
# O(nr*nc) path: loop over <row x col>
return self._get_columnXarray(row_range, col_range)
# O(nnz) path: loop over entries of self
newdok = dok_matrix(shape, dtype=self.dtype)
for key in self.keys():
i, ri = divmod(int(key[0]) - row_start, row_step)
if ri != 0 or i < 0 or i >= shape[0]:
continue
j, rj = divmod(int(key[1]) - col_start, col_step)
if rj != 0 or j < 0 or j >= shape[1]:
continue
x = dict.__getitem__(self, key)
dict.__setitem__(newdok, (i, j), x)
return newdok
def _get_intXarray(self, row, col):
return self._get_columnXarray([row], col)
def _get_arrayXint(self, row, col):
return self._get_columnXarray(row, [col])
def _get_sliceXarray(self, row, col):
row = list(range(*row.indices(self.shape[0])))
return self._get_columnXarray(row, col)
def _get_arrayXslice(self, row, col):
col = list(range(*col.indices(self.shape[1])))
return self._get_columnXarray(row, col)
def _get_columnXarray(self, row, col):
# outer indexing
newdok = dok_matrix((len(row), len(col)), dtype=self.dtype)
for i, r in enumerate(row):
for j, c in enumerate(col):
v = dict.get(self, (r, c), 0)
if v:
dict.__setitem__(newdok, (i, j), v)
return newdok
def _get_arrayXarray(self, row, col):
# inner indexing
i, j = map(np.atleast_2d, np.broadcast_arrays(row, col))
newdok = dok_matrix(i.shape, dtype=self.dtype)
for key in itertools.product(range(i.shape[0]), range(i.shape[1])):
v = dict.get(self, (i[key], j[key]), 0)
if v:
dict.__setitem__(newdok, key, v)
return newdok
def _set_intXint(self, row, col, x):
key = (row, col)
if x:
dict.__setitem__(self, key, x)
elif dict.__contains__(self, key):
del self[key]
def _set_arrayXarray(self, row, col, x):
row = list(map(int, row.ravel()))
col = list(map(int, col.ravel()))
x = x.ravel()
dict.update(self, zip(zip(row, col), x))
for i in np.nonzero(x == 0)[0]:
key = (row[i], col[i])
if dict.__getitem__(self, key) == 0:
# may have been superseded by later update
del self[key]
def __add__(self, other):
if isscalarlike(other):
res_dtype = upcast_scalar(self.dtype, other)
new = dok_matrix(self.shape, dtype=res_dtype)
# Add this scalar to every element.
M, N = self.shape
for key in itertools.product(range(M), range(N)):
aij = dict.get(self, (key), 0) + other
if aij:
new[key] = aij
# new.dtype.char = self.dtype.char
elif isspmatrix_dok(other):
if other.shape != self.shape:
raise ValueError("Matrix dimensions are not equal.")
# We could alternatively set the dimensions to the largest of
# the two matrices to be summed. Would this be a good idea?
res_dtype = upcast(self.dtype, other.dtype)
new = dok_matrix(self.shape, dtype=res_dtype)
dict.update(new, self)
with np.errstate(over='ignore'):
dict.update(new,
((k, new[k] + other[k]) for k in other.keys()))
elif isspmatrix(other):
csc = self.tocsc()
new = csc + other
elif isdense(other):
new = self.todense() + other
else:
return NotImplemented
return new
def __radd__(self, other):
if isscalarlike(other):
new = dok_matrix(self.shape, dtype=self.dtype)
M, N = self.shape
for key in itertools.product(range(M), range(N)):
aij = dict.get(self, (key), 0) + other
if aij:
new[key] = aij
elif isspmatrix_dok(other):
if other.shape != self.shape:
raise ValueError("Matrix dimensions are not equal.")
new = dok_matrix(self.shape, dtype=self.dtype)
dict.update(new, self)
dict.update(new,
((k, self[k] + other[k]) for k in other.keys()))
elif isspmatrix(other):
csc = self.tocsc()
new = csc + other
elif isdense(other):
new = other + self.todense()
else:
return NotImplemented
return new
def __neg__(self):
if self.dtype.kind == 'b':
raise NotImplementedError('Negating a sparse boolean matrix is not'
' supported.')
new = dok_matrix(self.shape, dtype=self.dtype)
dict.update(new, ((k, -self[k]) for k in self.keys()))
return new
def _mul_scalar(self, other):
res_dtype = upcast_scalar(self.dtype, other)
# Multiply this scalar by every element.
new = dok_matrix(self.shape, dtype=res_dtype)
dict.update(new, ((k, v * other) for k, v in self.items()))
return new
def _mul_vector(self, other):
# matrix * vector
result = np.zeros(self.shape[0], dtype=upcast(self.dtype, other.dtype))
for (i, j), v in self.items():
result[i] += v * other[j]
return result
def _mul_multivector(self, other):
# matrix * multivector
result_shape = (self.shape[0], other.shape[1])
result_dtype = upcast(self.dtype, other.dtype)
result = np.zeros(result_shape, dtype=result_dtype)
for (i, j), v in self.items():
result[i,:] += v * other[j,:]
return result
def __imul__(self, other):
if isscalarlike(other):
dict.update(self, ((k, v * other) for k, v in self.items()))
return self
return NotImplemented
def __truediv__(self, other):
if isscalarlike(other):
res_dtype = upcast_scalar(self.dtype, other)
new = dok_matrix(self.shape, dtype=res_dtype)
dict.update(new, ((k, v / other) for k, v in self.items()))
return new
return self.tocsr() / other
def __itruediv__(self, other):
if isscalarlike(other):
dict.update(self, ((k, v / other) for k, v in self.items()))
return self
return NotImplemented
def __reduce__(self):
# this approach is necessary because __setstate__ is called after
# __setitem__ upon unpickling and since __init__ is not called there
# is no shape attribute hence it is not possible to unpickle it.
return dict.__reduce__(self)
# What should len(sparse) return? For consistency with dense matrices,
# perhaps it should be the number of rows? For now it returns the number
# of non-zeros.
def transpose(self, axes=None, copy=False):
if axes is not None:
raise ValueError("Sparse matrices do not support "
"an 'axes' parameter because swapping "
"dimensions is the only logical permutation.")
M, N = self.shape
new = dok_matrix((N, M), dtype=self.dtype, copy=copy)
dict.update(new, (((right, left), val)
for (left, right), val in self.items()))
return new
transpose.__doc__ = spmatrix.transpose.__doc__
def conjtransp(self):
"""Return the conjugate transpose."""
M, N = self.shape
new = dok_matrix((N, M), dtype=self.dtype)
dict.update(new, (((right, left), np.conj(val))
for (left, right), val in self.items()))
return new
def copy(self):
new = dok_matrix(self.shape, dtype=self.dtype)
dict.update(new, self)
return new
copy.__doc__ = spmatrix.copy.__doc__
def tocoo(self, copy=False):
from .coo import coo_matrix
if self.nnz == 0:
return coo_matrix(self.shape, dtype=self.dtype)
idx_dtype = get_index_dtype(maxval=max(self.shape))
data = np.fromiter(self.values(), dtype=self.dtype, count=self.nnz)
row = np.fromiter((i for i, _ in self.keys()), dtype=idx_dtype, count=self.nnz)
col = np.fromiter((j for _, j in self.keys()), dtype=idx_dtype, count=self.nnz)
A = coo_matrix((data, (row, col)), shape=self.shape, dtype=self.dtype)
A.has_canonical_format = True
return A
tocoo.__doc__ = spmatrix.tocoo.__doc__
def todok(self, copy=False):
if copy:
return self.copy()
return self
todok.__doc__ = spmatrix.todok.__doc__
def tocsc(self, copy=False):
return self.tocoo(copy=False).tocsc(copy=copy)
tocsc.__doc__ = spmatrix.tocsc.__doc__
def resize(self, *shape):
shape = check_shape(shape)
newM, newN = shape
M, N = self.shape
if newM < M or newN < N:
# Remove all elements outside new dimensions
for (i, j) in list(self.keys()):
if i >= newM or j >= newN:
del self[i, j]
self._shape = shape
resize.__doc__ = spmatrix.resize.__doc__
def isspmatrix_dok(x):
"""Is x of dok_matrix type?
Parameters
----------
x
object to check for being a dok matrix
Returns
-------
bool
True if x is a dok matrix, False otherwise
Examples
--------
>>> from scipy.sparse import dok_matrix, isspmatrix_dok
>>> isspmatrix_dok(dok_matrix([[5]]))
True
>>> from scipy.sparse import dok_matrix, csr_matrix, isspmatrix_dok
>>> isspmatrix_dok(csr_matrix([[5]]))
False
"""
return isinstance(x, dok_matrix)

View file

@ -0,0 +1,169 @@
"""Functions to extract parts of sparse matrices
"""
__docformat__ = "restructuredtext en"
__all__ = ['find', 'tril', 'triu']
from .coo import coo_matrix
def find(A):
"""Return the indices and values of the nonzero elements of a matrix
Parameters
----------
A : dense or sparse matrix
Matrix whose nonzero elements are desired.
Returns
-------
(I,J,V) : tuple of arrays
I,J, and V contain the row indices, column indices, and values
of the nonzero matrix entries.
Examples
--------
>>> from scipy.sparse import csr_matrix, find
>>> A = csr_matrix([[7.0, 8.0, 0],[0, 0, 9.0]])
>>> find(A)
(array([0, 0, 1], dtype=int32), array([0, 1, 2], dtype=int32), array([ 7., 8., 9.]))
"""
A = coo_matrix(A, copy=True)
A.sum_duplicates()
# remove explicit zeros
nz_mask = A.data != 0
return A.row[nz_mask], A.col[nz_mask], A.data[nz_mask]
def tril(A, k=0, format=None):
"""Return the lower triangular portion of a matrix in sparse format
Returns the elements on or below the k-th diagonal of the matrix A.
- k = 0 corresponds to the main diagonal
- k > 0 is above the main diagonal
- k < 0 is below the main diagonal
Parameters
----------
A : dense or sparse matrix
Matrix whose lower trianglar portion is desired.
k : integer : optional
The top-most diagonal of the lower triangle.
format : string
Sparse format of the result, e.g. format="csr", etc.
Returns
-------
L : sparse matrix
Lower triangular portion of A in sparse format.
See Also
--------
triu : upper triangle in sparse format
Examples
--------
>>> from scipy.sparse import csr_matrix, tril
>>> A = csr_matrix([[1, 2, 0, 0, 3], [4, 5, 0, 6, 7], [0, 0, 8, 9, 0]],
... dtype='int32')
>>> A.toarray()
array([[1, 2, 0, 0, 3],
[4, 5, 0, 6, 7],
[0, 0, 8, 9, 0]])
>>> tril(A).toarray()
array([[1, 0, 0, 0, 0],
[4, 5, 0, 0, 0],
[0, 0, 8, 0, 0]])
>>> tril(A).nnz
4
>>> tril(A, k=1).toarray()
array([[1, 2, 0, 0, 0],
[4, 5, 0, 0, 0],
[0, 0, 8, 9, 0]])
>>> tril(A, k=-1).toarray()
array([[0, 0, 0, 0, 0],
[4, 0, 0, 0, 0],
[0, 0, 0, 0, 0]])
>>> tril(A, format='csc')
<3x5 sparse matrix of type '<class 'numpy.int32'>'
with 4 stored elements in Compressed Sparse Column format>
"""
# convert to COOrdinate format where things are easy
A = coo_matrix(A, copy=False)
mask = A.row + k >= A.col
return _masked_coo(A, mask).asformat(format)
def triu(A, k=0, format=None):
"""Return the upper triangular portion of a matrix in sparse format
Returns the elements on or above the k-th diagonal of the matrix A.
- k = 0 corresponds to the main diagonal
- k > 0 is above the main diagonal
- k < 0 is below the main diagonal
Parameters
----------
A : dense or sparse matrix
Matrix whose upper trianglar portion is desired.
k : integer : optional
The bottom-most diagonal of the upper triangle.
format : string
Sparse format of the result, e.g. format="csr", etc.
Returns
-------
L : sparse matrix
Upper triangular portion of A in sparse format.
See Also
--------
tril : lower triangle in sparse format
Examples
--------
>>> from scipy.sparse import csr_matrix, triu
>>> A = csr_matrix([[1, 2, 0, 0, 3], [4, 5, 0, 6, 7], [0, 0, 8, 9, 0]],
... dtype='int32')
>>> A.toarray()
array([[1, 2, 0, 0, 3],
[4, 5, 0, 6, 7],
[0, 0, 8, 9, 0]])
>>> triu(A).toarray()
array([[1, 2, 0, 0, 3],
[0, 5, 0, 6, 7],
[0, 0, 8, 9, 0]])
>>> triu(A).nnz
8
>>> triu(A, k=1).toarray()
array([[0, 2, 0, 0, 3],
[0, 0, 0, 6, 7],
[0, 0, 0, 9, 0]])
>>> triu(A, k=-1).toarray()
array([[1, 2, 0, 0, 3],
[4, 5, 0, 6, 7],
[0, 0, 8, 9, 0]])
>>> triu(A, format='csc')
<3x5 sparse matrix of type '<class 'numpy.int32'>'
with 8 stored elements in Compressed Sparse Column format>
"""
# convert to COOrdinate format where things are easy
A = coo_matrix(A, copy=False)
mask = A.row + k <= A.col
return _masked_coo(A, mask).asformat(format)
def _masked_coo(A, mask):
row = A.row[mask]
col = A.col[mask]
data = A.data[mask]
return coo_matrix((data, (row, col)), shape=A.shape, dtype=A.dtype)

View file

@ -0,0 +1,433 @@
"""
python generate_sparsetools.py
Generate manual wrappers for C++ sparsetools code.
Type codes used:
'i': integer scalar
'I': integer array
'T': data array
'B': boolean array
'V': std::vector<integer>*
'W': std::vector<data>*
'*': indicates that the next argument is an output argument
'v': void
'l': 64-bit integer scalar
See sparsetools.cxx for more details.
"""
import optparse
import os
from distutils.dep_util import newer
#
# List of all routines and their argument types.
#
# The first code indicates the return value, the rest the arguments.
#
# bsr.h
BSR_ROUTINES = """
bsr_diagonal v iiiiiIIT*T
bsr_tocsr v iiiiIIT*I*I*T
bsr_scale_rows v iiiiII*TT
bsr_scale_columns v iiiiII*TT
bsr_sort_indices v iiii*I*I*T
bsr_transpose v iiiiIIT*I*I*T
bsr_matmat v iiiiiiIITIIT*I*I*T
bsr_matvec v iiiiIITT*T
bsr_matvecs v iiiiiIITT*T
bsr_elmul_bsr v iiiiIITIIT*I*I*T
bsr_eldiv_bsr v iiiiIITIIT*I*I*T
bsr_plus_bsr v iiiiIITIIT*I*I*T
bsr_minus_bsr v iiiiIITIIT*I*I*T
bsr_maximum_bsr v iiiiIITIIT*I*I*T
bsr_minimum_bsr v iiiiIITIIT*I*I*T
bsr_ne_bsr v iiiiIITIIT*I*I*B
bsr_lt_bsr v iiiiIITIIT*I*I*B
bsr_gt_bsr v iiiiIITIIT*I*I*B
bsr_le_bsr v iiiiIITIIT*I*I*B
bsr_ge_bsr v iiiiIITIIT*I*I*B
"""
# csc.h
CSC_ROUTINES = """
csc_diagonal v iiiIIT*T
csc_tocsr v iiIIT*I*I*T
csc_matmat_maxnnz l iiIIII
csc_matmat v iiIITIIT*I*I*T
csc_matvec v iiIITT*T
csc_matvecs v iiiIITT*T
csc_elmul_csc v iiIITIIT*I*I*T
csc_eldiv_csc v iiIITIIT*I*I*T
csc_plus_csc v iiIITIIT*I*I*T
csc_minus_csc v iiIITIIT*I*I*T
csc_maximum_csc v iiIITIIT*I*I*T
csc_minimum_csc v iiIITIIT*I*I*T
csc_ne_csc v iiIITIIT*I*I*B
csc_lt_csc v iiIITIIT*I*I*B
csc_gt_csc v iiIITIIT*I*I*B
csc_le_csc v iiIITIIT*I*I*B
csc_ge_csc v iiIITIIT*I*I*B
"""
# csr.h
CSR_ROUTINES = """
csr_matmat_maxnnz l iiIIII
csr_matmat v iiIITIIT*I*I*T
csr_diagonal v iiiIIT*T
csr_tocsc v iiIIT*I*I*T
csr_tobsr v iiiiIIT*I*I*T
csr_todense v iiIIT*T
csr_matvec v iiIITT*T
csr_matvecs v iiiIITT*T
csr_elmul_csr v iiIITIIT*I*I*T
csr_eldiv_csr v iiIITIIT*I*I*T
csr_plus_csr v iiIITIIT*I*I*T
csr_minus_csr v iiIITIIT*I*I*T
csr_maximum_csr v iiIITIIT*I*I*T
csr_minimum_csr v iiIITIIT*I*I*T
csr_ne_csr v iiIITIIT*I*I*B
csr_lt_csr v iiIITIIT*I*I*B
csr_gt_csr v iiIITIIT*I*I*B
csr_le_csr v iiIITIIT*I*I*B
csr_ge_csr v iiIITIIT*I*I*B
csr_scale_rows v iiII*TT
csr_scale_columns v iiII*TT
csr_sort_indices v iI*I*T
csr_eliminate_zeros v ii*I*I*T
csr_sum_duplicates v ii*I*I*T
get_csr_submatrix v iiIITiiii*V*V*W
csr_row_index v iIIIT*I*T
csr_row_slice v iiiIIT*I*T
csr_column_index1 v iIiiII*I*I
csr_column_index2 v IIiIT*I*T
csr_sample_values v iiIITiII*T
csr_count_blocks i iiiiII
csr_sample_offsets i iiIIiII*I
expandptr v iI*I
test_throw_error i
csr_has_sorted_indices i iII
csr_has_canonical_format i iII
"""
# coo.h, dia.h, csgraph.h
OTHER_ROUTINES = """
coo_tocsr v iiiIIT*I*I*T
coo_todense v iilIIT*Ti
coo_matvec v lIITT*T
dia_matvec v iiiiITT*T
cs_graph_components i iII*I
"""
# List of compilation units
COMPILATION_UNITS = [
('bsr', BSR_ROUTINES),
('csr', CSR_ROUTINES),
('csc', CSC_ROUTINES),
('other', OTHER_ROUTINES),
]
#
# List of the supported index typenums and the corresponding C++ types
#
I_TYPES = [
('NPY_INT32', 'npy_int32'),
('NPY_INT64', 'npy_int64'),
]
#
# List of the supported data typenums and the corresponding C++ types
#
T_TYPES = [
('NPY_BOOL', 'npy_bool_wrapper'),
('NPY_BYTE', 'npy_byte'),
('NPY_UBYTE', 'npy_ubyte'),
('NPY_SHORT', 'npy_short'),
('NPY_USHORT', 'npy_ushort'),
('NPY_INT', 'npy_int'),
('NPY_UINT', 'npy_uint'),
('NPY_LONG', 'npy_long'),
('NPY_ULONG', 'npy_ulong'),
('NPY_LONGLONG', 'npy_longlong'),
('NPY_ULONGLONG', 'npy_ulonglong'),
('NPY_FLOAT', 'npy_float'),
('NPY_DOUBLE', 'npy_double'),
('NPY_LONGDOUBLE', 'npy_longdouble'),
('NPY_CFLOAT', 'npy_cfloat_wrapper'),
('NPY_CDOUBLE', 'npy_cdouble_wrapper'),
('NPY_CLONGDOUBLE', 'npy_clongdouble_wrapper'),
]
#
# Code templates
#
THUNK_TEMPLATE = """
static PY_LONG_LONG %(name)s_thunk(int I_typenum, int T_typenum, void **a)
{
%(thunk_content)s
}
"""
METHOD_TEMPLATE = """
NPY_VISIBILITY_HIDDEN PyObject *
%(name)s_method(PyObject *self, PyObject *args)
{
return call_thunk('%(ret_spec)s', "%(arg_spec)s", %(name)s_thunk, args);
}
"""
GET_THUNK_CASE_TEMPLATE = """
static int get_thunk_case(int I_typenum, int T_typenum)
{
%(content)s;
return -1;
}
"""
#
# Code generation
#
def get_thunk_type_set():
"""
Get a list containing cartesian product of data types, plus a getter routine.
Returns
-------
i_types : list [(j, I_typenum, None, I_type, None), ...]
Pairing of index type numbers and the corresponding C++ types,
and an unique index `j`. This is for routines that are parameterized
only by I but not by T.
it_types : list [(j, I_typenum, T_typenum, I_type, T_type), ...]
Same as `i_types`, but for routines parameterized both by T and I.
getter_code : str
C++ code for a function that takes I_typenum, T_typenum and returns
the unique index corresponding to the lists, or -1 if no match was
found.
"""
it_types = []
i_types = []
j = 0
getter_code = " if (0) {}"
for I_typenum, I_type in I_TYPES:
piece = """
else if (I_typenum == %(I_typenum)s) {
if (T_typenum == -1) { return %(j)s; }"""
getter_code += piece % dict(I_typenum=I_typenum, j=j)
i_types.append((j, I_typenum, None, I_type, None))
j += 1
for T_typenum, T_type in T_TYPES:
piece = """
else if (T_typenum == %(T_typenum)s) { return %(j)s; }"""
getter_code += piece % dict(T_typenum=T_typenum, j=j)
it_types.append((j, I_typenum, T_typenum, I_type, T_type))
j += 1
getter_code += """
}"""
return i_types, it_types, GET_THUNK_CASE_TEMPLATE % dict(content=getter_code)
def parse_routine(name, args, types):
"""
Generate thunk and method code for a given routine.
Parameters
----------
name : str
Name of the C++ routine
args : str
Argument list specification (in format explained above)
types : list
List of types to instantiate, as returned `get_thunk_type_set`
"""
ret_spec = args[0]
arg_spec = args[1:]
def get_arglist(I_type, T_type):
"""
Generate argument list for calling the C++ function
"""
args = []
next_is_writeable = False
j = 0
for t in arg_spec:
const = '' if next_is_writeable else 'const '
next_is_writeable = False
if t == '*':
next_is_writeable = True
continue
elif t == 'i':
args.append("*(%s*)a[%d]" % (const + I_type, j))
elif t == 'I':
args.append("(%s*)a[%d]" % (const + I_type, j))
elif t == 'T':
args.append("(%s*)a[%d]" % (const + T_type, j))
elif t == 'B':
args.append("(npy_bool_wrapper*)a[%d]" % (j,))
elif t == 'V':
if const:
raise ValueError("'V' argument must be an output arg")
args.append("(std::vector<%s>*)a[%d]" % (I_type, j,))
elif t == 'W':
if const:
raise ValueError("'W' argument must be an output arg")
args.append("(std::vector<%s>*)a[%d]" % (T_type, j,))
elif t == 'l':
args.append("*(%snpy_int64*)a[%d]" % (const, j))
else:
raise ValueError("Invalid spec character %r" % (t,))
j += 1
return ", ".join(args)
# Generate thunk code: a giant switch statement with different
# type combinations inside.
thunk_content = """int j = get_thunk_case(I_typenum, T_typenum);
switch (j) {"""
for j, I_typenum, T_typenum, I_type, T_type in types:
arglist = get_arglist(I_type, T_type)
if T_type is None:
dispatch = "%s" % (I_type,)
else:
dispatch = "%s,%s" % (I_type, T_type)
if 'B' in arg_spec:
dispatch += ",npy_bool_wrapper"
piece = """
case %(j)s:"""
if ret_spec == 'v':
piece += """
(void)%(name)s<%(dispatch)s>(%(arglist)s);
return 0;"""
else:
piece += """
return %(name)s<%(dispatch)s>(%(arglist)s);"""
thunk_content += piece % dict(j=j, I_type=I_type, T_type=T_type,
I_typenum=I_typenum, T_typenum=T_typenum,
arglist=arglist, name=name,
dispatch=dispatch)
thunk_content += """
default:
throw std::runtime_error("internal error: invalid argument typenums");
}"""
thunk_code = THUNK_TEMPLATE % dict(name=name,
thunk_content=thunk_content)
# Generate method code
method_code = METHOD_TEMPLATE % dict(name=name,
ret_spec=ret_spec,
arg_spec=arg_spec)
return thunk_code, method_code
def main():
p = optparse.OptionParser(usage=(__doc__ or '').strip())
p.add_option("--no-force", action="store_false",
dest="force", default=True)
options, args = p.parse_args()
names = []
i_types, it_types, getter_code = get_thunk_type_set()
# Generate *_impl.h for each compilation unit
for unit_name, routines in COMPILATION_UNITS:
thunks = []
methods = []
# Generate thunks and methods for all routines
for line in routines.splitlines():
line = line.strip()
if not line or line.startswith('#'):
continue
try:
name, args = line.split(None, 1)
except ValueError:
raise ValueError("Malformed line: %r" % (line,))
args = "".join(args.split())
if 't' in args or 'T' in args:
thunk, method = parse_routine(name, args, it_types)
else:
thunk, method = parse_routine(name, args, i_types)
if name in names:
raise ValueError("Duplicate routine %r" % (name,))
names.append(name)
thunks.append(thunk)
methods.append(method)
# Produce output
dst = os.path.join(os.path.dirname(__file__),
'sparsetools',
unit_name + '_impl.h')
if newer(__file__, dst) or options.force:
print("[generate_sparsetools] generating %r" % (dst,))
with open(dst, 'w') as f:
write_autogen_blurb(f)
f.write(getter_code)
for thunk in thunks:
f.write(thunk)
for method in methods:
f.write(method)
else:
print("[generate_sparsetools] %r already up-to-date" % (dst,))
# Generate code for method struct
method_defs = ""
for name in names:
method_defs += "NPY_VISIBILITY_HIDDEN PyObject *%s_method(PyObject *, PyObject *);\n" % (name,)
method_struct = """\nstatic struct PyMethodDef sparsetools_methods[] = {"""
for name in names:
method_struct += """
{"%(name)s", (PyCFunction)%(name)s_method, METH_VARARGS, NULL},""" % dict(name=name)
method_struct += """
{NULL, NULL, 0, NULL}
};"""
# Produce sparsetools_impl.h
dst = os.path.join(os.path.dirname(__file__),
'sparsetools',
'sparsetools_impl.h')
if newer(__file__, dst) or options.force:
print("[generate_sparsetools] generating %r" % (dst,))
with open(dst, 'w') as f:
write_autogen_blurb(f)
f.write(method_defs)
f.write(method_struct)
else:
print("[generate_sparsetools] %r already up-to-date" % (dst,))
def write_autogen_blurb(stream):
stream.write("""\
/* This file is autogenerated by generate_sparsetools.py
* Do not edit manually or check into VCS.
*/
""")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,550 @@
"""List of Lists sparse matrix class
"""
__docformat__ = "restructuredtext en"
__all__ = ['lil_matrix', 'isspmatrix_lil']
from bisect import bisect_left
import numpy as np
from .base import spmatrix, isspmatrix
from ._index import IndexMixin, INT_TYPES, _broadcast_arrays
from .sputils import (getdtype, isshape, isscalarlike, upcast_scalar,
get_index_dtype, check_shape, check_reshape_kwargs,
asmatrix)
from . import _csparsetools
class lil_matrix(spmatrix, IndexMixin):
"""Row-based list of lists sparse matrix
This is a structure for constructing sparse matrices incrementally.
Note that inserting a single item can take linear time in the worst case;
to construct a matrix efficiently, make sure the items are pre-sorted by
index, per row.
This can be instantiated in several ways:
lil_matrix(D)
with a dense matrix or rank-2 ndarray D
lil_matrix(S)
with another sparse matrix S (equivalent to S.tolil())
lil_matrix((M, N), [dtype])
to construct an empty matrix with shape (M, N)
dtype is optional, defaulting to dtype='d'.
Attributes
----------
dtype : dtype
Data type of the matrix
shape : 2-tuple
Shape of the matrix
ndim : int
Number of dimensions (this is always 2)
nnz
Number of stored values, including explicit zeros
data
LIL format data array of the matrix
rows
LIL format row index array of the matrix
Notes
-----
Sparse matrices can be used in arithmetic operations: they support
addition, subtraction, multiplication, division, and matrix power.
Advantages of the LIL format
- supports flexible slicing
- changes to the matrix sparsity structure are efficient
Disadvantages of the LIL format
- arithmetic operations LIL + LIL are slow (consider CSR or CSC)
- slow column slicing (consider CSC)
- slow matrix vector products (consider CSR or CSC)
Intended Usage
- LIL is a convenient format for constructing sparse matrices
- once a matrix has been constructed, convert to CSR or
CSC format for fast arithmetic and matrix vector operations
- consider using the COO format when constructing large matrices
Data Structure
- An array (``self.rows``) of rows, each of which is a sorted
list of column indices of non-zero elements.
- The corresponding nonzero values are stored in similar
fashion in ``self.data``.
"""
format = 'lil'
def __init__(self, arg1, shape=None, dtype=None, copy=False):
spmatrix.__init__(self)
self.dtype = getdtype(dtype, arg1, default=float)
# First get the shape
if isspmatrix(arg1):
if isspmatrix_lil(arg1) and copy:
A = arg1.copy()
else:
A = arg1.tolil()
if dtype is not None:
A = A.astype(dtype, copy=False)
self._shape = check_shape(A.shape)
self.dtype = A.dtype
self.rows = A.rows
self.data = A.data
elif isinstance(arg1,tuple):
if isshape(arg1):
if shape is not None:
raise ValueError('invalid use of shape parameter')
M, N = arg1
self._shape = check_shape((M, N))
self.rows = np.empty((M,), dtype=object)
self.data = np.empty((M,), dtype=object)
for i in range(M):
self.rows[i] = []
self.data[i] = []
else:
raise TypeError('unrecognized lil_matrix constructor usage')
else:
# assume A is dense
try:
A = asmatrix(arg1)
except TypeError:
raise TypeError('unsupported matrix type')
else:
from .csr import csr_matrix
A = csr_matrix(A, dtype=dtype).tolil()
self._shape = check_shape(A.shape)
self.dtype = A.dtype
self.rows = A.rows
self.data = A.data
def __iadd__(self,other):
self[:,:] = self + other
return self
def __isub__(self,other):
self[:,:] = self - other
return self
def __imul__(self,other):
if isscalarlike(other):
self[:,:] = self * other
return self
else:
return NotImplemented
def __itruediv__(self,other):
if isscalarlike(other):
self[:,:] = self / other
return self
else:
return NotImplemented
# Whenever the dimensions change, empty lists should be created for each
# row
def getnnz(self, axis=None):
if axis is None:
return sum([len(rowvals) for rowvals in self.data])
if axis < 0:
axis += 2
if axis == 0:
out = np.zeros(self.shape[1], dtype=np.intp)
for row in self.rows:
out[row] += 1
return out
elif axis == 1:
return np.array([len(rowvals) for rowvals in self.data], dtype=np.intp)
else:
raise ValueError('axis out of bounds')
def count_nonzero(self):
return sum(np.count_nonzero(rowvals) for rowvals in self.data)
getnnz.__doc__ = spmatrix.getnnz.__doc__
count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__
def __str__(self):
val = ''
for i, row in enumerate(self.rows):
for pos, j in enumerate(row):
val += " %s\t%s\n" % (str((i, j)), str(self.data[i][pos]))
return val[:-1]
def getrowview(self, i):
"""Returns a view of the 'i'th row (without copying).
"""
new = lil_matrix((1, self.shape[1]), dtype=self.dtype)
new.rows[0] = self.rows[i]
new.data[0] = self.data[i]
return new
def getrow(self, i):
"""Returns a copy of the 'i'th row.
"""
M, N = self.shape
if i < 0:
i += M
if i < 0 or i >= M:
raise IndexError('row index out of bounds')
new = lil_matrix((1, N), dtype=self.dtype)
new.rows[0] = self.rows[i][:]
new.data[0] = self.data[i][:]
return new
def __getitem__(self, key):
# Fast path for simple (int, int) indexing.
if (isinstance(key, tuple) and len(key) == 2 and
isinstance(key[0], INT_TYPES) and
isinstance(key[1], INT_TYPES)):
# lil_get1 handles validation for us.
return self._get_intXint(*key)
# Everything else takes the normal path.
return IndexMixin.__getitem__(self, key)
def _asindices(self, idx, N):
# LIL routines handle bounds-checking for us, so don't do it here.
try:
x = np.asarray(idx)
except (ValueError, TypeError, MemoryError):
raise IndexError('invalid index')
if x.ndim not in (1, 2):
raise IndexError('Index dimension must be <= 2')
return x
def _get_intXint(self, row, col):
v = _csparsetools.lil_get1(self.shape[0], self.shape[1], self.rows,
self.data, row, col)
return self.dtype.type(v)
def _get_sliceXint(self, row, col):
row = range(*row.indices(self.shape[0]))
return self._get_row_ranges(row, slice(col, col+1))
def _get_arrayXint(self, row, col):
return self._get_row_ranges(row, slice(col, col+1))
def _get_intXslice(self, row, col):
return self._get_row_ranges((row,), col)
def _get_sliceXslice(self, row, col):
row = range(*row.indices(self.shape[0]))
return self._get_row_ranges(row, col)
def _get_arrayXslice(self, row, col):
return self._get_row_ranges(row, col)
def _get_intXarray(self, row, col):
row = np.array(row, dtype=col.dtype, ndmin=1)
return self._get_columnXarray(row, col)
def _get_sliceXarray(self, row, col):
row = np.arange(*row.indices(self.shape[0]))
return self._get_columnXarray(row, col)
def _get_columnXarray(self, row, col):
# outer indexing
row, col = _broadcast_arrays(row[:,None], col)
return self._get_arrayXarray(row, col)
def _get_arrayXarray(self, row, col):
# inner indexing
i, j = map(np.atleast_2d, _prepare_index_for_memoryview(row, col))
new = lil_matrix(i.shape, dtype=self.dtype)
_csparsetools.lil_fancy_get(self.shape[0], self.shape[1],
self.rows, self.data,
new.rows, new.data,
i, j)
return new
def _get_row_ranges(self, rows, col_slice):
"""
Fast path for indexing in the case where column index is slice.
This gains performance improvement over brute force by more
efficient skipping of zeros, by accessing the elements
column-wise in order.
Parameters
----------
rows : sequence or range
Rows indexed. If range, must be within valid bounds.
col_slice : slice
Columns indexed
"""
j_start, j_stop, j_stride = col_slice.indices(self.shape[1])
col_range = range(j_start, j_stop, j_stride)
nj = len(col_range)
new = lil_matrix((len(rows), nj), dtype=self.dtype)
_csparsetools.lil_get_row_ranges(self.shape[0], self.shape[1],
self.rows, self.data,
new.rows, new.data,
rows,
j_start, j_stop, j_stride, nj)
return new
def _set_intXint(self, row, col, x):
_csparsetools.lil_insert(self.shape[0], self.shape[1], self.rows,
self.data, row, col, x)
def _set_arrayXarray(self, row, col, x):
i, j, x = map(np.atleast_2d, _prepare_index_for_memoryview(row, col, x))
_csparsetools.lil_fancy_set(self.shape[0], self.shape[1],
self.rows, self.data,
i, j, x)
def _set_arrayXarray_sparse(self, row, col, x):
# Special case: full matrix assignment
if (x.shape == self.shape and
isinstance(row, slice) and row == slice(None) and
isinstance(col, slice) and col == slice(None)):
x = lil_matrix(x, dtype=self.dtype)
self.rows = x.rows
self.data = x.data
return
# Fall back to densifying x
x = np.asarray(x.toarray(), dtype=self.dtype)
x, _ = _broadcast_arrays(x, row)
self._set_arrayXarray(row, col, x)
def __setitem__(self, key, x):
# Fast path for simple (int, int) indexing.
if (isinstance(key, tuple) and len(key) == 2 and
isinstance(key[0], INT_TYPES) and
isinstance(key[1], INT_TYPES)):
x = self.dtype.type(x)
if x.size > 1:
raise ValueError("Trying to assign a sequence to an item")
return self._set_intXint(key[0], key[1], x)
# Everything else takes the normal path.
IndexMixin.__setitem__(self, key, x)
def _mul_scalar(self, other):
if other == 0:
# Multiply by zero: return the zero matrix
new = lil_matrix(self.shape, dtype=self.dtype)
else:
res_dtype = upcast_scalar(self.dtype, other)
new = self.copy()
new = new.astype(res_dtype)
# Multiply this scalar by every element.
for j, rowvals in enumerate(new.data):
new.data[j] = [val*other for val in rowvals]
return new
def __truediv__(self, other): # self / other
if isscalarlike(other):
new = self.copy()
# Divide every element by this scalar
for j, rowvals in enumerate(new.data):
new.data[j] = [val/other for val in rowvals]
return new
else:
return self.tocsr() / other
def copy(self):
M, N = self.shape
new = lil_matrix(self.shape, dtype=self.dtype)
# This is ~14x faster than calling deepcopy() on rows and data.
_csparsetools.lil_get_row_ranges(M, N, self.rows, self.data,
new.rows, new.data, range(M),
0, N, 1, N)
return new
copy.__doc__ = spmatrix.copy.__doc__
def reshape(self, *args, **kwargs):
shape = check_shape(args, self.shape)
order, copy = check_reshape_kwargs(kwargs)
# Return early if reshape is not required
if shape == self.shape:
if copy:
return self.copy()
else:
return self
new = lil_matrix(shape, dtype=self.dtype)
if order == 'C':
ncols = self.shape[1]
for i, row in enumerate(self.rows):
for col, j in enumerate(row):
new_r, new_c = np.unravel_index(i * ncols + j, shape)
new[new_r, new_c] = self[i, j]
elif order == 'F':
nrows = self.shape[0]
for i, row in enumerate(self.rows):
for col, j in enumerate(row):
new_r, new_c = np.unravel_index(i + j * nrows, shape, order)
new[new_r, new_c] = self[i, j]
else:
raise ValueError("'order' must be 'C' or 'F'")
return new
reshape.__doc__ = spmatrix.reshape.__doc__
def resize(self, *shape):
shape = check_shape(shape)
new_M, new_N = shape
M, N = self.shape
if new_M < M:
self.rows = self.rows[:new_M]
self.data = self.data[:new_M]
elif new_M > M:
self.rows = np.resize(self.rows, new_M)
self.data = np.resize(self.data, new_M)
for i in range(M, new_M):
self.rows[i] = []
self.data[i] = []
if new_N < N:
for row, data in zip(self.rows, self.data):
trunc = bisect_left(row, new_N)
del row[trunc:]
del data[trunc:]
self._shape = shape
resize.__doc__ = spmatrix.resize.__doc__
def toarray(self, order=None, out=None):
d = self._process_toarray_args(order, out)
for i, row in enumerate(self.rows):
for pos, j in enumerate(row):
d[i, j] = self.data[i][pos]
return d
toarray.__doc__ = spmatrix.toarray.__doc__
def transpose(self, axes=None, copy=False):
return self.tocsr(copy=copy).transpose(axes=axes, copy=False).tolil(copy=False)
transpose.__doc__ = spmatrix.transpose.__doc__
def tolil(self, copy=False):
if copy:
return self.copy()
else:
return self
tolil.__doc__ = spmatrix.tolil.__doc__
def tocsr(self, copy=False):
from .csr import csr_matrix
M, N = self.shape
if M == 0 or N == 0:
return csr_matrix((M, N), dtype=self.dtype)
# construct indptr array
if M*N <= np.iinfo(np.int32).max:
# fast path: it is known that 64-bit indexing will not be needed.
idx_dtype = np.int32
indptr = np.empty(M + 1, dtype=idx_dtype)
indptr[0] = 0
_csparsetools.lil_get_lengths(self.rows, indptr[1:])
np.cumsum(indptr, out=indptr)
nnz = indptr[-1]
else:
idx_dtype = get_index_dtype(maxval=N)
lengths = np.empty(M, dtype=idx_dtype)
_csparsetools.lil_get_lengths(self.rows, lengths)
nnz = lengths.sum()
idx_dtype = get_index_dtype(maxval=max(N, nnz))
indptr = np.empty(M + 1, dtype=idx_dtype)
indptr[0] = 0
np.cumsum(lengths, dtype=idx_dtype, out=indptr[1:])
indices = np.empty(nnz, dtype=idx_dtype)
data = np.empty(nnz, dtype=self.dtype)
_csparsetools.lil_flatten_to_array(self.rows, indices)
_csparsetools.lil_flatten_to_array(self.data, data)
# init csr matrix
return csr_matrix((data, indices, indptr), shape=self.shape)
tocsr.__doc__ = spmatrix.tocsr.__doc__
def _prepare_index_for_memoryview(i, j, x=None):
"""
Convert index and data arrays to form suitable for passing to the
Cython fancy getset routines.
The conversions are necessary since to (i) ensure the integer
index arrays are in one of the accepted types, and (ii) to ensure
the arrays are writable so that Cython memoryview support doesn't
choke on them.
Parameters
----------
i, j
Index arrays
x : optional
Data arrays
Returns
-------
i, j, x
Re-formatted arrays (x is omitted, if input was None)
"""
if i.dtype > j.dtype:
j = j.astype(i.dtype)
elif i.dtype < j.dtype:
i = i.astype(j.dtype)
if not i.flags.writeable or i.dtype not in (np.int32, np.int64):
i = i.astype(np.intp)
if not j.flags.writeable or j.dtype not in (np.int32, np.int64):
j = j.astype(np.intp)
if x is not None:
if not x.flags.writeable:
x = x.copy()
return i, j, x
else:
return i, j
def isspmatrix_lil(x):
"""Is x of lil_matrix type?
Parameters
----------
x
object to check for being a lil matrix
Returns
-------
bool
True if x is a lil matrix, False otherwise
Examples
--------
>>> from scipy.sparse import lil_matrix, isspmatrix_lil
>>> isspmatrix_lil(lil_matrix([[5]]))
True
>>> from scipy.sparse import lil_matrix, csr_matrix, isspmatrix_lil
>>> isspmatrix_lil(csr_matrix([[5]]))
False
"""
return isinstance(x, lil_matrix)

View file

@ -0,0 +1,124 @@
"""
Sparse linear algebra (:mod:`scipy.sparse.linalg`)
==================================================
.. currentmodule:: scipy.sparse.linalg
Abstract linear operators
-------------------------
.. autosummary::
:toctree: generated/
LinearOperator -- abstract representation of a linear operator
aslinearoperator -- convert an object to an abstract linear operator
Matrix Operations
-----------------
.. autosummary::
:toctree: generated/
inv -- compute the sparse matrix inverse
expm -- compute the sparse matrix exponential
expm_multiply -- compute the product of a matrix exponential and a matrix
Matrix norms
------------
.. autosummary::
:toctree: generated/
norm -- Norm of a sparse matrix
onenormest -- Estimate the 1-norm of a sparse matrix
Solving linear problems
-----------------------
Direct methods for linear equation systems:
.. autosummary::
:toctree: generated/
spsolve -- Solve the sparse linear system Ax=b
spsolve_triangular -- Solve the sparse linear system Ax=b for a triangular matrix
factorized -- Pre-factorize matrix to a function solving a linear system
MatrixRankWarning -- Warning on exactly singular matrices
use_solver -- Select direct solver to use
Iterative methods for linear equation systems:
.. autosummary::
:toctree: generated/
bicg -- Use BIConjugate Gradient iteration to solve A x = b
bicgstab -- Use BIConjugate Gradient STABilized iteration to solve A x = b
cg -- Use Conjugate Gradient iteration to solve A x = b
cgs -- Use Conjugate Gradient Squared iteration to solve A x = b
gmres -- Use Generalized Minimal RESidual iteration to solve A x = b
lgmres -- Solve a matrix equation using the LGMRES algorithm
minres -- Use MINimum RESidual iteration to solve Ax = b
qmr -- Use Quasi-Minimal Residual iteration to solve A x = b
gcrotmk -- Solve a matrix equation using the GCROT(m,k) algorithm
Iterative methods for least-squares problems:
.. autosummary::
:toctree: generated/
lsqr -- Find the least-squares solution to a sparse linear equation system
lsmr -- Find the least-squares solution to a sparse linear equation system
Matrix factorizations
---------------------
Eigenvalue problems:
.. autosummary::
:toctree: generated/
eigs -- Find k eigenvalues and eigenvectors of the square matrix A
eigsh -- Find k eigenvalues and eigenvectors of a symmetric matrix
lobpcg -- Solve symmetric partial eigenproblems with optional preconditioning
Singular values problems:
.. autosummary::
:toctree: generated/
svds -- Compute k singular values/vectors for a sparse matrix
Complete or incomplete LU factorizations
.. autosummary::
:toctree: generated/
splu -- Compute a LU decomposition for a sparse matrix
spilu -- Compute an incomplete LU decomposition for a sparse matrix
SuperLU -- Object representing an LU factorization
Exceptions
----------
.. autosummary::
:toctree: generated/
ArpackNoConvergence
ArpackError
"""
from .isolve import *
from .dsolve import *
from .interface import *
from .eigen import *
from .matfuncs import *
from ._onenormest import *
from ._norm import *
from ._expm_multiply import *
__all__ = [s for s in dir() if not s.startswith('_')]
from scipy._lib._testutils import PytestTester
test = PytestTester(__name__)
del PytestTester

View file

@ -0,0 +1,711 @@
"""Compute the action of the matrix exponential.
"""
import numpy as np
import scipy.linalg
import scipy.sparse.linalg
from scipy.sparse.linalg import aslinearoperator
from scipy.sparse.sputils import is_pydata_spmatrix
__all__ = ['expm_multiply']
def _exact_inf_norm(A):
# A compatibility function which should eventually disappear.
if scipy.sparse.isspmatrix(A):
return max(abs(A).sum(axis=1).flat)
elif is_pydata_spmatrix(A):
return max(abs(A).sum(axis=1))
else:
return np.linalg.norm(A, np.inf)
def _exact_1_norm(A):
# A compatibility function which should eventually disappear.
if scipy.sparse.isspmatrix(A):
return max(abs(A).sum(axis=0).flat)
elif is_pydata_spmatrix(A):
return max(abs(A).sum(axis=0))
else:
return np.linalg.norm(A, 1)
def _trace(A):
# A compatibility function which should eventually disappear.
if scipy.sparse.isspmatrix(A):
return A.diagonal().sum()
elif is_pydata_spmatrix(A):
return A.to_scipy_sparse().diagonal().sum()
else:
return np.trace(A)
def _ident_like(A):
# A compatibility function which should eventually disappear.
if scipy.sparse.isspmatrix(A):
return scipy.sparse.construct.eye(A.shape[0], A.shape[1],
dtype=A.dtype, format=A.format)
elif is_pydata_spmatrix(A):
import sparse
return sparse.eye(A.shape[0], A.shape[1], dtype=A.dtype)
else:
return np.eye(A.shape[0], A.shape[1], dtype=A.dtype)
def expm_multiply(A, B, start=None, stop=None, num=None, endpoint=None):
"""
Compute the action of the matrix exponential of A on B.
Parameters
----------
A : transposable linear operator
The operator whose exponential is of interest.
B : ndarray
The matrix or vector to be multiplied by the matrix exponential of A.
start : scalar, optional
The starting time point of the sequence.
stop : scalar, optional
The end time point of the sequence, unless `endpoint` is set to False.
In that case, the sequence consists of all but the last of ``num + 1``
evenly spaced time points, so that `stop` is excluded.
Note that the step size changes when `endpoint` is False.
num : int, optional
Number of time points to use.
endpoint : bool, optional
If True, `stop` is the last time point. Otherwise, it is not included.
Returns
-------
expm_A_B : ndarray
The result of the action :math:`e^{t_k A} B`.
Notes
-----
The optional arguments defining the sequence of evenly spaced time points
are compatible with the arguments of `numpy.linspace`.
The output ndarray shape is somewhat complicated so I explain it here.
The ndim of the output could be either 1, 2, or 3.
It would be 1 if you are computing the expm action on a single vector
at a single time point.
It would be 2 if you are computing the expm action on a vector
at multiple time points, or if you are computing the expm action
on a matrix at a single time point.
It would be 3 if you want the action on a matrix with multiple
columns at multiple time points.
If multiple time points are requested, expm_A_B[0] will always
be the action of the expm at the first time point,
regardless of whether the action is on a vector or a matrix.
References
----------
.. [1] Awad H. Al-Mohy and Nicholas J. Higham (2011)
"Computing the Action of the Matrix Exponential,
with an Application to Exponential Integrators."
SIAM Journal on Scientific Computing,
33 (2). pp. 488-511. ISSN 1064-8275
http://eprints.ma.man.ac.uk/1591/
.. [2] Nicholas J. Higham and Awad H. Al-Mohy (2010)
"Computing Matrix Functions."
Acta Numerica,
19. 159-208. ISSN 0962-4929
http://eprints.ma.man.ac.uk/1451/
Examples
--------
>>> from scipy.sparse import csc_matrix
>>> from scipy.sparse.linalg import expm, expm_multiply
>>> A = csc_matrix([[1, 0], [0, 1]])
>>> A.todense()
matrix([[1, 0],
[0, 1]], dtype=int64)
>>> B = np.array([np.exp(-1.), np.exp(-2.)])
>>> B
array([ 0.36787944, 0.13533528])
>>> expm_multiply(A, B, start=1, stop=2, num=3, endpoint=True)
array([[ 1. , 0.36787944],
[ 1.64872127, 0.60653066],
[ 2.71828183, 1. ]])
>>> expm(A).dot(B) # Verify 1st timestep
array([ 1. , 0.36787944])
>>> expm(1.5*A).dot(B) # Verify 2nd timestep
array([ 1.64872127, 0.60653066])
>>> expm(2*A).dot(B) # Verify 3rd timestep
array([ 2.71828183, 1. ])
"""
if all(arg is None for arg in (start, stop, num, endpoint)):
X = _expm_multiply_simple(A, B)
else:
X, status = _expm_multiply_interval(A, B, start, stop, num, endpoint)
return X
def _expm_multiply_simple(A, B, t=1.0, balance=False):
"""
Compute the action of the matrix exponential at a single time point.
Parameters
----------
A : transposable linear operator
The operator whose exponential is of interest.
B : ndarray
The matrix to be multiplied by the matrix exponential of A.
t : float
A time point.
balance : bool
Indicates whether or not to apply balancing.
Returns
-------
F : ndarray
:math:`e^{t A} B`
Notes
-----
This is algorithm (3.2) in Al-Mohy and Higham (2011).
"""
if balance:
raise NotImplementedError
if len(A.shape) != 2 or A.shape[0] != A.shape[1]:
raise ValueError('expected A to be like a square matrix')
if A.shape[1] != B.shape[0]:
raise ValueError('the matrices A and B have incompatible shapes')
ident = _ident_like(A)
n = A.shape[0]
if len(B.shape) == 1:
n0 = 1
elif len(B.shape) == 2:
n0 = B.shape[1]
else:
raise ValueError('expected B to be like a matrix or a vector')
u_d = 2**-53
tol = u_d
mu = _trace(A) / float(n)
A = A - mu * ident
A_1_norm = _exact_1_norm(A)
if t*A_1_norm == 0:
m_star, s = 0, 1
else:
ell = 2
norm_info = LazyOperatorNormInfo(t*A, A_1_norm=t*A_1_norm, ell=ell)
m_star, s = _fragment_3_1(norm_info, n0, tol, ell=ell)
return _expm_multiply_simple_core(A, B, t, mu, m_star, s, tol, balance)
def _expm_multiply_simple_core(A, B, t, mu, m_star, s, tol=None, balance=False):
"""
A helper function.
"""
if balance:
raise NotImplementedError
if tol is None:
u_d = 2 ** -53
tol = u_d
F = B
eta = np.exp(t*mu / float(s))
for i in range(s):
c1 = _exact_inf_norm(B)
for j in range(m_star):
coeff = t / float(s*(j+1))
B = coeff * A.dot(B)
c2 = _exact_inf_norm(B)
F = F + B
if c1 + c2 <= tol * _exact_inf_norm(F):
break
c1 = c2
F = eta * F
B = F
return F
# This table helps to compute bounds.
# They seem to have been difficult to calculate, involving symbolic
# manipulation of equations, followed by numerical root finding.
_theta = {
# The first 30 values are from table A.3 of Computing Matrix Functions.
1: 2.29e-16,
2: 2.58e-8,
3: 1.39e-5,
4: 3.40e-4,
5: 2.40e-3,
6: 9.07e-3,
7: 2.38e-2,
8: 5.00e-2,
9: 8.96e-2,
10: 1.44e-1,
# 11
11: 2.14e-1,
12: 3.00e-1,
13: 4.00e-1,
14: 5.14e-1,
15: 6.41e-1,
16: 7.81e-1,
17: 9.31e-1,
18: 1.09,
19: 1.26,
20: 1.44,
# 21
21: 1.62,
22: 1.82,
23: 2.01,
24: 2.22,
25: 2.43,
26: 2.64,
27: 2.86,
28: 3.08,
29: 3.31,
30: 3.54,
# The rest are from table 3.1 of
# Computing the Action of the Matrix Exponential.
35: 4.7,
40: 6.0,
45: 7.2,
50: 8.5,
55: 9.9,
}
def _onenormest_matrix_power(A, p,
t=2, itmax=5, compute_v=False, compute_w=False):
"""
Efficiently estimate the 1-norm of A^p.
Parameters
----------
A : ndarray
Matrix whose 1-norm of a power is to be computed.
p : int
Non-negative integer power.
t : int, optional
A positive parameter controlling the tradeoff between
accuracy versus time and memory usage.
Larger values take longer and use more memory
but give more accurate output.
itmax : int, optional
Use at most this many iterations.
compute_v : bool, optional
Request a norm-maximizing linear operator input vector if True.
compute_w : bool, optional
Request a norm-maximizing linear operator output vector if True.
Returns
-------
est : float
An underestimate of the 1-norm of the sparse matrix.
v : ndarray, optional
The vector such that ||Av||_1 == est*||v||_1.
It can be thought of as an input to the linear operator
that gives an output with particularly large norm.
w : ndarray, optional
The vector Av which has relatively large 1-norm.
It can be thought of as an output of the linear operator
that is relatively large in norm compared to the input.
"""
#XXX Eventually turn this into an API function in the _onenormest module,
#XXX and remove its underscore,
#XXX but wait until expm_multiply goes into scipy.
return scipy.sparse.linalg.onenormest(aslinearoperator(A) ** p)
class LazyOperatorNormInfo:
"""
Information about an operator is lazily computed.
The information includes the exact 1-norm of the operator,
in addition to estimates of 1-norms of powers of the operator.
This uses the notation of Computing the Action (2011).
This class is specialized enough to probably not be of general interest
outside of this module.
"""
def __init__(self, A, A_1_norm=None, ell=2, scale=1):
"""
Provide the operator and some norm-related information.
Parameters
----------
A : linear operator
The operator of interest.
A_1_norm : float, optional
The exact 1-norm of A.
ell : int, optional
A technical parameter controlling norm estimation quality.
scale : int, optional
If specified, return the norms of scale*A instead of A.
"""
self._A = A
self._A_1_norm = A_1_norm
self._ell = ell
self._d = {}
self._scale = scale
def set_scale(self,scale):
"""
Set the scale parameter.
"""
self._scale = scale
def onenorm(self):
"""
Compute the exact 1-norm.
"""
if self._A_1_norm is None:
self._A_1_norm = _exact_1_norm(self._A)
return self._scale*self._A_1_norm
def d(self, p):
"""
Lazily estimate d_p(A) ~= || A^p ||^(1/p) where ||.|| is the 1-norm.
"""
if p not in self._d:
est = _onenormest_matrix_power(self._A, p, self._ell)
self._d[p] = est ** (1.0 / p)
return self._scale*self._d[p]
def alpha(self, p):
"""
Lazily compute max(d(p), d(p+1)).
"""
return max(self.d(p), self.d(p+1))
def _compute_cost_div_m(m, p, norm_info):
"""
A helper function for computing bounds.
This is equation (3.10).
It measures cost in terms of the number of required matrix products.
Parameters
----------
m : int
A valid key of _theta.
p : int
A matrix power.
norm_info : LazyOperatorNormInfo
Information about 1-norms of related operators.
Returns
-------
cost_div_m : int
Required number of matrix products divided by m.
"""
return int(np.ceil(norm_info.alpha(p) / _theta[m]))
def _compute_p_max(m_max):
"""
Compute the largest positive integer p such that p*(p-1) <= m_max + 1.
Do this in a slightly dumb way, but safe and not too slow.
Parameters
----------
m_max : int
A count related to bounds.
"""
sqrt_m_max = np.sqrt(m_max)
p_low = int(np.floor(sqrt_m_max))
p_high = int(np.ceil(sqrt_m_max + 1))
return max(p for p in range(p_low, p_high+1) if p*(p-1) <= m_max + 1)
def _fragment_3_1(norm_info, n0, tol, m_max=55, ell=2):
"""
A helper function for the _expm_multiply_* functions.
Parameters
----------
norm_info : LazyOperatorNormInfo
Information about norms of certain linear operators of interest.
n0 : int
Number of columns in the _expm_multiply_* B matrix.
tol : float
Expected to be
:math:`2^{-24}` for single precision or
:math:`2^{-53}` for double precision.
m_max : int
A value related to a bound.
ell : int
The number of columns used in the 1-norm approximation.
This is usually taken to be small, maybe between 1 and 5.
Returns
-------
best_m : int
Related to bounds for error control.
best_s : int
Amount of scaling.
Notes
-----
This is code fragment (3.1) in Al-Mohy and Higham (2011).
The discussion of default values for m_max and ell
is given between the definitions of equation (3.11)
and the definition of equation (3.12).
"""
if ell < 1:
raise ValueError('expected ell to be a positive integer')
best_m = None
best_s = None
if _condition_3_13(norm_info.onenorm(), n0, m_max, ell):
for m, theta in _theta.items():
s = int(np.ceil(norm_info.onenorm() / theta))
if best_m is None or m * s < best_m * best_s:
best_m = m
best_s = s
else:
# Equation (3.11).
for p in range(2, _compute_p_max(m_max) + 1):
for m in range(p*(p-1)-1, m_max+1):
if m in _theta:
s = _compute_cost_div_m(m, p, norm_info)
if best_m is None or m * s < best_m * best_s:
best_m = m
best_s = s
best_s = max(best_s, 1)
return best_m, best_s
def _condition_3_13(A_1_norm, n0, m_max, ell):
"""
A helper function for the _expm_multiply_* functions.
Parameters
----------
A_1_norm : float
The precomputed 1-norm of A.
n0 : int
Number of columns in the _expm_multiply_* B matrix.
m_max : int
A value related to a bound.
ell : int
The number of columns used in the 1-norm approximation.
This is usually taken to be small, maybe between 1 and 5.
Returns
-------
value : bool
Indicates whether or not the condition has been met.
Notes
-----
This is condition (3.13) in Al-Mohy and Higham (2011).
"""
# This is the rhs of equation (3.12).
p_max = _compute_p_max(m_max)
a = 2 * ell * p_max * (p_max + 3)
# Evaluate the condition (3.13).
b = _theta[m_max] / float(n0 * m_max)
return A_1_norm <= a * b
def _expm_multiply_interval(A, B, start=None, stop=None,
num=None, endpoint=None, balance=False, status_only=False):
"""
Compute the action of the matrix exponential at multiple time points.
Parameters
----------
A : transposable linear operator
The operator whose exponential is of interest.
B : ndarray
The matrix to be multiplied by the matrix exponential of A.
start : scalar, optional
The starting time point of the sequence.
stop : scalar, optional
The end time point of the sequence, unless `endpoint` is set to False.
In that case, the sequence consists of all but the last of ``num + 1``
evenly spaced time points, so that `stop` is excluded.
Note that the step size changes when `endpoint` is False.
num : int, optional
Number of time points to use.
endpoint : bool, optional
If True, `stop` is the last time point. Otherwise, it is not included.
balance : bool
Indicates whether or not to apply balancing.
status_only : bool
A flag that is set to True for some debugging and testing operations.
Returns
-------
F : ndarray
:math:`e^{t_k A} B`
status : int
An integer status for testing and debugging.
Notes
-----
This is algorithm (5.2) in Al-Mohy and Higham (2011).
There seems to be a typo, where line 15 of the algorithm should be
moved to line 6.5 (between lines 6 and 7).
"""
if balance:
raise NotImplementedError
if len(A.shape) != 2 or A.shape[0] != A.shape[1]:
raise ValueError('expected A to be like a square matrix')
if A.shape[1] != B.shape[0]:
raise ValueError('the matrices A and B have incompatible shapes')
ident = _ident_like(A)
n = A.shape[0]
if len(B.shape) == 1:
n0 = 1
elif len(B.shape) == 2:
n0 = B.shape[1]
else:
raise ValueError('expected B to be like a matrix or a vector')
u_d = 2**-53
tol = u_d
mu = _trace(A) / float(n)
# Get the linspace samples, attempting to preserve the linspace defaults.
linspace_kwargs = {'retstep': True}
if num is not None:
linspace_kwargs['num'] = num
if endpoint is not None:
linspace_kwargs['endpoint'] = endpoint
samples, step = np.linspace(start, stop, **linspace_kwargs)
# Convert the linspace output to the notation used by the publication.
nsamples = len(samples)
if nsamples < 2:
raise ValueError('at least two time points are required')
q = nsamples - 1
h = step
t_0 = samples[0]
t_q = samples[q]
# Define the output ndarray.
# Use an ndim=3 shape, such that the last two indices
# are the ones that may be involved in level 3 BLAS operations.
X_shape = (nsamples,) + B.shape
X = np.empty(X_shape, dtype=np.result_type(A.dtype, B.dtype, float))
t = t_q - t_0
A = A - mu * ident
A_1_norm = _exact_1_norm(A)
ell = 2
norm_info = LazyOperatorNormInfo(t*A, A_1_norm=t*A_1_norm, ell=ell)
if t*A_1_norm == 0:
m_star, s = 0, 1
else:
m_star, s = _fragment_3_1(norm_info, n0, tol, ell=ell)
# Compute the expm action up to the initial time point.
X[0] = _expm_multiply_simple_core(A, B, t_0, mu, m_star, s)
# Compute the expm action at the rest of the time points.
if q <= s:
if status_only:
return 0
else:
return _expm_multiply_interval_core_0(A, X,
h, mu, q, norm_info, tol, ell,n0)
elif not (q % s):
if status_only:
return 1
else:
return _expm_multiply_interval_core_1(A, X,
h, mu, m_star, s, q, tol)
elif (q % s):
if status_only:
return 2
else:
return _expm_multiply_interval_core_2(A, X,
h, mu, m_star, s, q, tol)
else:
raise Exception('internal error')
def _expm_multiply_interval_core_0(A, X, h, mu, q, norm_info, tol, ell, n0):
"""
A helper function, for the case q <= s.
"""
# Compute the new values of m_star and s which should be applied
# over intervals of size t/q
if norm_info.onenorm() == 0:
m_star, s = 0, 1
else:
norm_info.set_scale(1./q)
m_star, s = _fragment_3_1(norm_info, n0, tol, ell=ell)
norm_info.set_scale(1)
for k in range(q):
X[k+1] = _expm_multiply_simple_core(A, X[k], h, mu, m_star, s)
return X, 0
def _expm_multiply_interval_core_1(A, X, h, mu, m_star, s, q, tol):
"""
A helper function, for the case q > s and q % s == 0.
"""
d = q // s
input_shape = X.shape[1:]
K_shape = (m_star + 1, ) + input_shape
K = np.empty(K_shape, dtype=X.dtype)
for i in range(s):
Z = X[i*d]
K[0] = Z
high_p = 0
for k in range(1, d+1):
F = K[0]
c1 = _exact_inf_norm(F)
for p in range(1, m_star+1):
if p > high_p:
K[p] = h * A.dot(K[p-1]) / float(p)
coeff = float(pow(k, p))
F = F + coeff * K[p]
inf_norm_K_p_1 = _exact_inf_norm(K[p])
c2 = coeff * inf_norm_K_p_1
if c1 + c2 <= tol * _exact_inf_norm(F):
break
c1 = c2
X[k + i*d] = np.exp(k*h*mu) * F
return X, 1
def _expm_multiply_interval_core_2(A, X, h, mu, m_star, s, q, tol):
"""
A helper function, for the case q > s and q % s > 0.
"""
d = q // s
j = q // d
r = q - d * j
input_shape = X.shape[1:]
K_shape = (m_star + 1, ) + input_shape
K = np.empty(K_shape, dtype=X.dtype)
for i in range(j + 1):
Z = X[i*d]
K[0] = Z
high_p = 0
if i < j:
effective_d = d
else:
effective_d = r
for k in range(1, effective_d+1):
F = K[0]
c1 = _exact_inf_norm(F)
for p in range(1, m_star+1):
if p == high_p + 1:
K[p] = h * A.dot(K[p-1]) / float(p)
high_p = p
coeff = float(pow(k, p))
F = F + coeff * K[p]
inf_norm_K_p_1 = _exact_inf_norm(K[p])
c2 = coeff * inf_norm_K_p_1
if c1 + c2 <= tol * _exact_inf_norm(F):
break
c1 = c2
X[k + i*d] = np.exp(k*h*mu) * F
return X, 2

View file

@ -0,0 +1,182 @@
"""Sparse matrix norms.
"""
import numpy as np
from scipy.sparse import issparse
from numpy import Inf, sqrt, abs
__all__ = ['norm']
def _sparse_frobenius_norm(x):
if np.issubdtype(x.dtype, np.complexfloating):
sqnorm = abs(x).power(2).sum()
else:
sqnorm = x.power(2).sum()
return sqrt(sqnorm)
def norm(x, ord=None, axis=None):
"""
Norm of a sparse matrix
This function is able to return one of seven different matrix norms,
depending on the value of the ``ord`` parameter.
Parameters
----------
x : a sparse matrix
Input sparse matrix.
ord : {non-zero int, inf, -inf, 'fro'}, optional
Order of the norm (see table under ``Notes``). inf means numpy's
`inf` object.
axis : {int, 2-tuple of ints, None}, optional
If `axis` is an integer, it specifies the axis of `x` along which to
compute the vector norms. If `axis` is a 2-tuple, it specifies the
axes that hold 2-D matrices, and the matrix norms of these matrices
are computed. If `axis` is None then either a vector norm (when `x`
is 1-D) or a matrix norm (when `x` is 2-D) is returned.
Returns
-------
n : float or ndarray
Notes
-----
Some of the ord are not implemented because some associated functions like,
_multi_svd_norm, are not yet available for sparse matrix.
This docstring is modified based on numpy.linalg.norm.
https://github.com/numpy/numpy/blob/master/numpy/linalg/linalg.py
The following norms can be calculated:
===== ============================
ord norm for sparse matrices
===== ============================
None Frobenius norm
'fro' Frobenius norm
inf max(sum(abs(x), axis=1))
-inf min(sum(abs(x), axis=1))
0 abs(x).sum(axis=axis)
1 max(sum(abs(x), axis=0))
-1 min(sum(abs(x), axis=0))
2 Not implemented
-2 Not implemented
other Not implemented
===== ============================
The Frobenius norm is given by [1]_:
:math:`||A||_F = [\\sum_{i,j} abs(a_{i,j})^2]^{1/2}`
References
----------
.. [1] G. H. Golub and C. F. Van Loan, *Matrix Computations*,
Baltimore, MD, Johns Hopkins University Press, 1985, pg. 15
Examples
--------
>>> from scipy.sparse import *
>>> import numpy as np
>>> from scipy.sparse.linalg import norm
>>> a = np.arange(9) - 4
>>> a
array([-4, -3, -2, -1, 0, 1, 2, 3, 4])
>>> b = a.reshape((3, 3))
>>> b
array([[-4, -3, -2],
[-1, 0, 1],
[ 2, 3, 4]])
>>> b = csr_matrix(b)
>>> norm(b)
7.745966692414834
>>> norm(b, 'fro')
7.745966692414834
>>> norm(b, np.inf)
9
>>> norm(b, -np.inf)
2
>>> norm(b, 1)
7
>>> norm(b, -1)
6
"""
if not issparse(x):
raise TypeError("input is not sparse. use numpy.linalg.norm")
# Check the default case first and handle it immediately.
if axis is None and ord in (None, 'fro', 'f'):
return _sparse_frobenius_norm(x)
# Some norms require functions that are not implemented for all types.
x = x.tocsr()
if axis is None:
axis = (0, 1)
elif not isinstance(axis, tuple):
msg = "'axis' must be None, an integer or a tuple of integers"
try:
int_axis = int(axis)
except TypeError:
raise TypeError(msg)
if axis != int_axis:
raise TypeError(msg)
axis = (int_axis,)
nd = 2
if len(axis) == 2:
row_axis, col_axis = axis
if not (-nd <= row_axis < nd and -nd <= col_axis < nd):
raise ValueError('Invalid axis %r for an array with shape %r' %
(axis, x.shape))
if row_axis % nd == col_axis % nd:
raise ValueError('Duplicate axes given.')
if ord == 2:
raise NotImplementedError
#return _multi_svd_norm(x, row_axis, col_axis, amax)
elif ord == -2:
raise NotImplementedError
#return _multi_svd_norm(x, row_axis, col_axis, amin)
elif ord == 1:
return abs(x).sum(axis=row_axis).max(axis=col_axis)[0,0]
elif ord == Inf:
return abs(x).sum(axis=col_axis).max(axis=row_axis)[0,0]
elif ord == -1:
return abs(x).sum(axis=row_axis).min(axis=col_axis)[0,0]
elif ord == -Inf:
return abs(x).sum(axis=col_axis).min(axis=row_axis)[0,0]
elif ord in (None, 'f', 'fro'):
# The axis order does not matter for this norm.
return _sparse_frobenius_norm(x)
else:
raise ValueError("Invalid norm order for matrices.")
elif len(axis) == 1:
a, = axis
if not (-nd <= a < nd):
raise ValueError('Invalid axis %r for an array with shape %r' %
(axis, x.shape))
if ord == Inf:
M = abs(x).max(axis=a)
elif ord == -Inf:
M = abs(x).min(axis=a)
elif ord == 0:
# Zero norm
M = (x != 0).sum(axis=a)
elif ord == 1:
# special case for speedup
M = abs(x).sum(axis=a)
elif ord in (2, None):
M = sqrt(abs(x).power(2).sum(axis=a))
else:
try:
ord + 1
except TypeError:
raise ValueError('Invalid norm order for vectors.')
M = np.power(abs(x).power(ord).sum(axis=a), 1 / ord)
return M.A.ravel()
else:
raise ValueError("Improper number of dimensions to norm.")

View file

@ -0,0 +1,466 @@
"""Sparse block 1-norm estimator.
"""
import numpy as np
from scipy.sparse.linalg import aslinearoperator
__all__ = ['onenormest']
def onenormest(A, t=2, itmax=5, compute_v=False, compute_w=False):
"""
Compute a lower bound of the 1-norm of a sparse matrix.
Parameters
----------
A : ndarray or other linear operator
A linear operator that can be transposed and that can
produce matrix products.
t : int, optional
A positive parameter controlling the tradeoff between
accuracy versus time and memory usage.
Larger values take longer and use more memory
but give more accurate output.
itmax : int, optional
Use at most this many iterations.
compute_v : bool, optional
Request a norm-maximizing linear operator input vector if True.
compute_w : bool, optional
Request a norm-maximizing linear operator output vector if True.
Returns
-------
est : float
An underestimate of the 1-norm of the sparse matrix.
v : ndarray, optional
The vector such that ||Av||_1 == est*||v||_1.
It can be thought of as an input to the linear operator
that gives an output with particularly large norm.
w : ndarray, optional
The vector Av which has relatively large 1-norm.
It can be thought of as an output of the linear operator
that is relatively large in norm compared to the input.
Notes
-----
This is algorithm 2.4 of [1].
In [2] it is described as follows.
"This algorithm typically requires the evaluation of
about 4t matrix-vector products and almost invariably
produces a norm estimate (which is, in fact, a lower
bound on the norm) correct to within a factor 3."
.. versionadded:: 0.13.0
References
----------
.. [1] Nicholas J. Higham and Francoise Tisseur (2000),
"A Block Algorithm for Matrix 1-Norm Estimation,
with an Application to 1-Norm Pseudospectra."
SIAM J. Matrix Anal. Appl. Vol. 21, No. 4, pp. 1185-1201.
.. [2] Awad H. Al-Mohy and Nicholas J. Higham (2009),
"A new scaling and squaring algorithm for the matrix exponential."
SIAM J. Matrix Anal. Appl. Vol. 31, No. 3, pp. 970-989.
Examples
--------
>>> from scipy.sparse import csc_matrix
>>> from scipy.sparse.linalg import onenormest
>>> A = csc_matrix([[1., 0., 0.], [5., 8., 2.], [0., -1., 0.]], dtype=float)
>>> A.todense()
matrix([[ 1., 0., 0.],
[ 5., 8., 2.],
[ 0., -1., 0.]])
>>> onenormest(A)
9.0
>>> np.linalg.norm(A.todense(), ord=1)
9.0
"""
# Check the input.
A = aslinearoperator(A)
if A.shape[0] != A.shape[1]:
raise ValueError('expected the operator to act like a square matrix')
# If the operator size is small compared to t,
# then it is easier to compute the exact norm.
# Otherwise estimate the norm.
n = A.shape[1]
if t >= n:
A_explicit = np.asarray(aslinearoperator(A).matmat(np.identity(n)))
if A_explicit.shape != (n, n):
raise Exception('internal error: ',
'unexpected shape ' + str(A_explicit.shape))
col_abs_sums = abs(A_explicit).sum(axis=0)
if col_abs_sums.shape != (n, ):
raise Exception('internal error: ',
'unexpected shape ' + str(col_abs_sums.shape))
argmax_j = np.argmax(col_abs_sums)
v = elementary_vector(n, argmax_j)
w = A_explicit[:, argmax_j]
est = col_abs_sums[argmax_j]
else:
est, v, w, nmults, nresamples = _onenormest_core(A, A.H, t, itmax)
# Report the norm estimate along with some certificates of the estimate.
if compute_v or compute_w:
result = (est,)
if compute_v:
result += (v,)
if compute_w:
result += (w,)
return result
else:
return est
def _blocked_elementwise(func):
"""
Decorator for an elementwise function, to apply it blockwise along
first dimension, to avoid excessive memory usage in temporaries.
"""
block_size = 2**20
def wrapper(x):
if x.shape[0] < block_size:
return func(x)
else:
y0 = func(x[:block_size])
y = np.zeros((x.shape[0],) + y0.shape[1:], dtype=y0.dtype)
y[:block_size] = y0
del y0
for j in range(block_size, x.shape[0], block_size):
y[j:j+block_size] = func(x[j:j+block_size])
return y
return wrapper
@_blocked_elementwise
def sign_round_up(X):
"""
This should do the right thing for both real and complex matrices.
From Higham and Tisseur:
"Everything in this section remains valid for complex matrices
provided that sign(A) is redefined as the matrix (aij / |aij|)
(and sign(0) = 1) transposes are replaced by conjugate transposes."
"""
Y = X.copy()
Y[Y == 0] = 1
Y /= np.abs(Y)
return Y
@_blocked_elementwise
def _max_abs_axis1(X):
return np.max(np.abs(X), axis=1)
def _sum_abs_axis0(X):
block_size = 2**20
r = None
for j in range(0, X.shape[0], block_size):
y = np.sum(np.abs(X[j:j+block_size]), axis=0)
if r is None:
r = y
else:
r += y
return r
def elementary_vector(n, i):
v = np.zeros(n, dtype=float)
v[i] = 1
return v
def vectors_are_parallel(v, w):
# Columns are considered parallel when they are equal or negative.
# Entries are required to be in {-1, 1},
# which guarantees that the magnitudes of the vectors are identical.
if v.ndim != 1 or v.shape != w.shape:
raise ValueError('expected conformant vectors with entries in {-1,1}')
n = v.shape[0]
return np.dot(v, w) == n
def every_col_of_X_is_parallel_to_a_col_of_Y(X, Y):
for v in X.T:
if not any(vectors_are_parallel(v, w) for w in Y.T):
return False
return True
def column_needs_resampling(i, X, Y=None):
# column i of X needs resampling if either
# it is parallel to a previous column of X or
# it is parallel to a column of Y
n, t = X.shape
v = X[:, i]
if any(vectors_are_parallel(v, X[:, j]) for j in range(i)):
return True
if Y is not None:
if any(vectors_are_parallel(v, w) for w in Y.T):
return True
return False
def resample_column(i, X):
X[:, i] = np.random.randint(0, 2, size=X.shape[0])*2 - 1
def less_than_or_close(a, b):
return np.allclose(a, b) or (a < b)
def _algorithm_2_2(A, AT, t):
"""
This is Algorithm 2.2.
Parameters
----------
A : ndarray or other linear operator
A linear operator that can produce matrix products.
AT : ndarray or other linear operator
The transpose of A.
t : int, optional
A positive parameter controlling the tradeoff between
accuracy versus time and memory usage.
Returns
-------
g : sequence
A non-negative decreasing vector
such that g[j] is a lower bound for the 1-norm
of the column of A of jth largest 1-norm.
The first entry of this vector is therefore a lower bound
on the 1-norm of the linear operator A.
This sequence has length t.
ind : sequence
The ith entry of ind is the index of the column A whose 1-norm
is given by g[i].
This sequence of indices has length t, and its entries are
chosen from range(n), possibly with repetition,
where n is the order of the operator A.
Notes
-----
This algorithm is mainly for testing.
It uses the 'ind' array in a way that is similar to
its usage in algorithm 2.4. This algorithm 2.2 may be easier to test,
so it gives a chance of uncovering bugs related to indexing
which could have propagated less noticeably to algorithm 2.4.
"""
A_linear_operator = aslinearoperator(A)
AT_linear_operator = aslinearoperator(AT)
n = A_linear_operator.shape[0]
# Initialize the X block with columns of unit 1-norm.
X = np.ones((n, t))
if t > 1:
X[:, 1:] = np.random.randint(0, 2, size=(n, t-1))*2 - 1
X /= float(n)
# Iteratively improve the lower bounds.
# Track extra things, to assert invariants for debugging.
g_prev = None
h_prev = None
k = 1
ind = range(t)
while True:
Y = np.asarray(A_linear_operator.matmat(X))
g = _sum_abs_axis0(Y)
best_j = np.argmax(g)
g.sort()
g = g[::-1]
S = sign_round_up(Y)
Z = np.asarray(AT_linear_operator.matmat(S))
h = _max_abs_axis1(Z)
# If this algorithm runs for fewer than two iterations,
# then its return values do not have the properties indicated
# in the description of the algorithm.
# In particular, the entries of g are not 1-norms of any
# column of A until the second iteration.
# Therefore we will require the algorithm to run for at least
# two iterations, even though this requirement is not stated
# in the description of the algorithm.
if k >= 2:
if less_than_or_close(max(h), np.dot(Z[:, best_j], X[:, best_j])):
break
ind = np.argsort(h)[::-1][:t]
h = h[ind]
for j in range(t):
X[:, j] = elementary_vector(n, ind[j])
# Check invariant (2.2).
if k >= 2:
if not less_than_or_close(g_prev[0], h_prev[0]):
raise Exception('invariant (2.2) is violated')
if not less_than_or_close(h_prev[0], g[0]):
raise Exception('invariant (2.2) is violated')
# Check invariant (2.3).
if k >= 3:
for j in range(t):
if not less_than_or_close(g[j], g_prev[j]):
raise Exception('invariant (2.3) is violated')
# Update for the next iteration.
g_prev = g
h_prev = h
k += 1
# Return the lower bounds and the corresponding column indices.
return g, ind
def _onenormest_core(A, AT, t, itmax):
"""
Compute a lower bound of the 1-norm of a sparse matrix.
Parameters
----------
A : ndarray or other linear operator
A linear operator that can produce matrix products.
AT : ndarray or other linear operator
The transpose of A.
t : int, optional
A positive parameter controlling the tradeoff between
accuracy versus time and memory usage.
itmax : int, optional
Use at most this many iterations.
Returns
-------
est : float
An underestimate of the 1-norm of the sparse matrix.
v : ndarray, optional
The vector such that ||Av||_1 == est*||v||_1.
It can be thought of as an input to the linear operator
that gives an output with particularly large norm.
w : ndarray, optional
The vector Av which has relatively large 1-norm.
It can be thought of as an output of the linear operator
that is relatively large in norm compared to the input.
nmults : int, optional
The number of matrix products that were computed.
nresamples : int, optional
The number of times a parallel column was observed,
necessitating a re-randomization of the column.
Notes
-----
This is algorithm 2.4.
"""
# This function is a more or less direct translation
# of Algorithm 2.4 from the Higham and Tisseur (2000) paper.
A_linear_operator = aslinearoperator(A)
AT_linear_operator = aslinearoperator(AT)
if itmax < 2:
raise ValueError('at least two iterations are required')
if t < 1:
raise ValueError('at least one column is required')
n = A.shape[0]
if t >= n:
raise ValueError('t should be smaller than the order of A')
# Track the number of big*small matrix multiplications
# and the number of resamplings.
nmults = 0
nresamples = 0
# "We now explain our choice of starting matrix. We take the first
# column of X to be the vector of 1s [...] This has the advantage that
# for a matrix with nonnegative elements the algorithm converges
# with an exact estimate on the second iteration, and such matrices
# arise in applications [...]"
X = np.ones((n, t), dtype=float)
# "The remaining columns are chosen as rand{-1,1},
# with a check for and correction of parallel columns,
# exactly as for S in the body of the algorithm."
if t > 1:
for i in range(1, t):
# These are technically initial samples, not resamples,
# so the resampling count is not incremented.
resample_column(i, X)
for i in range(t):
while column_needs_resampling(i, X):
resample_column(i, X)
nresamples += 1
# "Choose starting matrix X with columns of unit 1-norm."
X /= float(n)
# "indices of used unit vectors e_j"
ind_hist = np.zeros(0, dtype=np.intp)
est_old = 0
S = np.zeros((n, t), dtype=float)
k = 1
ind = None
while True:
Y = np.asarray(A_linear_operator.matmat(X))
nmults += 1
mags = _sum_abs_axis0(Y)
est = np.max(mags)
best_j = np.argmax(mags)
if est > est_old or k == 2:
if k >= 2:
ind_best = ind[best_j]
w = Y[:, best_j]
# (1)
if k >= 2 and est <= est_old:
est = est_old
break
est_old = est
S_old = S
if k > itmax:
break
S = sign_round_up(Y)
del Y
# (2)
if every_col_of_X_is_parallel_to_a_col_of_Y(S, S_old):
break
if t > 1:
# "Ensure that no column of S is parallel to another column of S
# or to a column of S_old by replacing columns of S by rand{-1,1}."
for i in range(t):
while column_needs_resampling(i, S, S_old):
resample_column(i, S)
nresamples += 1
del S_old
# (3)
Z = np.asarray(AT_linear_operator.matmat(S))
nmults += 1
h = _max_abs_axis1(Z)
del Z
# (4)
if k >= 2 and max(h) == h[ind_best]:
break
# "Sort h so that h_first >= ... >= h_last
# and re-order ind correspondingly."
#
# Later on, we will need at most t+len(ind_hist) largest
# entries, so drop the rest
ind = np.argsort(h)[::-1][:t+len(ind_hist)].copy()
del h
if t > 1:
# (5)
# Break if the most promising t vectors have been visited already.
if np.in1d(ind[:t], ind_hist).all():
break
# Put the most promising unvisited vectors at the front of the list
# and put the visited vectors at the end of the list.
# Preserve the order of the indices induced by the ordering of h.
seen = np.in1d(ind, ind_hist)
ind = np.concatenate((ind[~seen], ind[seen]))
for j in range(t):
X[:, j] = elementary_vector(n, ind[j])
new_ind = ind[:t][~np.in1d(ind[:t], ind_hist)]
ind_hist = np.concatenate((ind_hist, new_ind))
k += 1
v = elementary_vector(n, ind_best)
return est, v, w, nmults, nresamples

View file

@ -0,0 +1,29 @@
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required
approvals from U.S. Dept. of Energy)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
(1) Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
(2) Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of
Energy nor the names of its contributors may be used to endorse or promote
products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,66 @@
"""
Linear Solvers
==============
The default solver is SuperLU (included in the scipy distribution),
which can solve real or complex linear systems in both single and
double precisions. It is automatically replaced by UMFPACK, if
available. Note that UMFPACK works in double precision only, so
switch it off by::
>>> use_solver(useUmfpack=False)
to solve in the single precision. See also use_solver documentation.
Example session::
>>> from scipy.sparse import csc_matrix, spdiags
>>> from numpy import array
>>> from scipy.sparse.linalg import spsolve, use_solver
>>>
>>> print("Inverting a sparse linear system:")
>>> print("The sparse matrix (constructed from diagonals):")
>>> a = spdiags([[1, 2, 3, 4, 5], [6, 5, 8, 9, 10]], [0, 1], 5, 5)
>>> b = array([1, 2, 3, 4, 5])
>>> print("Solve: single precision complex:")
>>> use_solver( useUmfpack = False )
>>> a = a.astype('F')
>>> x = spsolve(a, b)
>>> print(x)
>>> print("Error: ", a*x-b)
>>>
>>> print("Solve: double precision complex:")
>>> use_solver( useUmfpack = True )
>>> a = a.astype('D')
>>> x = spsolve(a, b)
>>> print(x)
>>> print("Error: ", a*x-b)
>>>
>>> print("Solve: double precision:")
>>> a = a.astype('d')
>>> x = spsolve(a, b)
>>> print(x)
>>> print("Error: ", a*x-b)
>>>
>>> print("Solve: single precision:")
>>> use_solver( useUmfpack = False )
>>> a = a.astype('f')
>>> x = spsolve(a, b.astype('f'))
>>> print(x)
>>> print("Error: ", a*x-b)
"""
#import umfpack
#__doc__ = '\n\n'.join( (__doc__, umfpack.__doc__) )
#del umfpack
from .linsolve import *
from ._superlu import SuperLU
from . import _add_newdocs
__all__ = [s for s in dir() if not s.startswith('_')]
from scipy._lib._testutils import PytestTester
test = PytestTester(__name__)
del PytestTester

View file

@ -0,0 +1,152 @@
from numpy.lib import add_newdoc
add_newdoc('scipy.sparse.linalg.dsolve._superlu', 'SuperLU',
"""
LU factorization of a sparse matrix.
Factorization is represented as::
Pr * A * Pc = L * U
To construct these `SuperLU` objects, call the `splu` and `spilu`
functions.
Attributes
----------
shape
nnz
perm_c
perm_r
L
U
Methods
-------
solve
Notes
-----
.. versionadded:: 0.14.0
Examples
--------
The LU decomposition can be used to solve matrix equations. Consider:
>>> import numpy as np
>>> from scipy.sparse import csc_matrix, linalg as sla
>>> A = csc_matrix([[1,2,0,4],[1,0,0,1],[1,0,2,1],[2,2,1,0.]])
This can be solved for a given right-hand side:
>>> lu = sla.splu(A)
>>> b = np.array([1, 2, 3, 4])
>>> x = lu.solve(b)
>>> A.dot(x)
array([ 1., 2., 3., 4.])
The ``lu`` object also contains an explicit representation of the
decomposition. The permutations are represented as mappings of
indices:
>>> lu.perm_r
array([0, 2, 1, 3], dtype=int32)
>>> lu.perm_c
array([2, 0, 1, 3], dtype=int32)
The L and U factors are sparse matrices in CSC format:
>>> lu.L.A
array([[ 1. , 0. , 0. , 0. ],
[ 0. , 1. , 0. , 0. ],
[ 0. , 0. , 1. , 0. ],
[ 1. , 0.5, 0.5, 1. ]])
>>> lu.U.A
array([[ 2., 0., 1., 4.],
[ 0., 2., 1., 1.],
[ 0., 0., 1., 1.],
[ 0., 0., 0., -5.]])
The permutation matrices can be constructed:
>>> Pr = csc_matrix((np.ones(4), (lu.perm_r, np.arange(4))))
>>> Pc = csc_matrix((np.ones(4), (np.arange(4), lu.perm_c)))
We can reassemble the original matrix:
>>> (Pr.T * (lu.L * lu.U) * Pc.T).A
array([[ 1., 2., 0., 4.],
[ 1., 0., 0., 1.],
[ 1., 0., 2., 1.],
[ 2., 2., 1., 0.]])
""")
add_newdoc('scipy.sparse.linalg.dsolve._superlu', 'SuperLU', ('solve',
"""
solve(rhs[, trans])
Solves linear system of equations with one or several right-hand sides.
Parameters
----------
rhs : ndarray, shape (n,) or (n, k)
Right hand side(s) of equation
trans : {'N', 'T', 'H'}, optional
Type of system to solve::
'N': A * x == rhs (default)
'T': A^T * x == rhs
'H': A^H * x == rhs
i.e., normal, transposed, and hermitian conjugate.
Returns
-------
x : ndarray, shape ``rhs.shape``
Solution vector(s)
"""))
add_newdoc('scipy.sparse.linalg.dsolve._superlu', 'SuperLU', ('L',
"""
Lower triangular factor with unit diagonal as a
`scipy.sparse.csc_matrix`.
.. versionadded:: 0.14.0
"""))
add_newdoc('scipy.sparse.linalg.dsolve._superlu', 'SuperLU', ('U',
"""
Upper triangular factor as a `scipy.sparse.csc_matrix`.
.. versionadded:: 0.14.0
"""))
add_newdoc('scipy.sparse.linalg.dsolve._superlu', 'SuperLU', ('shape',
"""
Shape of the original matrix as a tuple of ints.
"""))
add_newdoc('scipy.sparse.linalg.dsolve._superlu', 'SuperLU', ('nnz',
"""
Number of nonzero elements in the matrix.
"""))
add_newdoc('scipy.sparse.linalg.dsolve._superlu', 'SuperLU', ('perm_c',
"""
Permutation Pc represented as an array of indices.
The column permutation matrix can be reconstructed via:
>>> Pc = np.zeros((n, n))
>>> Pc[np.arange(n), perm_c] = 1
"""))
add_newdoc('scipy.sparse.linalg.dsolve._superlu', 'SuperLU', ('perm_r',
"""
Permutation Pr represented as an array of indices.
The row permutation matrix can be reconstructed via:
>>> Pr = np.zeros((n, n))
>>> Pr[perm_r, np.arange(n)] = 1
"""))

View file

@ -0,0 +1,626 @@
from warnings import warn
import numpy as np
from numpy import asarray
from scipy.sparse import (isspmatrix_csc, isspmatrix_csr, isspmatrix,
SparseEfficiencyWarning, csc_matrix, csr_matrix)
from scipy.sparse.sputils import is_pydata_spmatrix
from scipy.linalg import LinAlgError
import copy
from . import _superlu
noScikit = False
try:
import scikits.umfpack as umfpack
except ImportError:
noScikit = True
useUmfpack = not noScikit
__all__ = ['use_solver', 'spsolve', 'splu', 'spilu', 'factorized',
'MatrixRankWarning', 'spsolve_triangular']
class MatrixRankWarning(UserWarning):
pass
def use_solver(**kwargs):
"""
Select default sparse direct solver to be used.
Parameters
----------
useUmfpack : bool, optional
Use UMFPACK over SuperLU. Has effect only if scikits.umfpack is
installed. Default: True
assumeSortedIndices : bool, optional
Allow UMFPACK to skip the step of sorting indices for a CSR/CSC matrix.
Has effect only if useUmfpack is True and scikits.umfpack is installed.
Default: False
Notes
-----
The default sparse solver is umfpack when available
(scikits.umfpack is installed). This can be changed by passing
useUmfpack = False, which then causes the always present SuperLU
based solver to be used.
Umfpack requires a CSR/CSC matrix to have sorted column/row indices. If
sure that the matrix fulfills this, pass ``assumeSortedIndices=True``
to gain some speed.
"""
if 'useUmfpack' in kwargs:
globals()['useUmfpack'] = kwargs['useUmfpack']
if useUmfpack and 'assumeSortedIndices' in kwargs:
umfpack.configure(assumeSortedIndices=kwargs['assumeSortedIndices'])
def _get_umf_family(A):
"""Get umfpack family string given the sparse matrix dtype."""
_families = {
(np.float64, np.int32): 'di',
(np.complex128, np.int32): 'zi',
(np.float64, np.int64): 'dl',
(np.complex128, np.int64): 'zl'
}
f_type = np.sctypeDict[A.dtype.name]
i_type = np.sctypeDict[A.indices.dtype.name]
try:
family = _families[(f_type, i_type)]
except KeyError:
msg = 'only float64 or complex128 matrices with int32 or int64' \
' indices are supported! (got: matrix: %s, indices: %s)' \
% (f_type, i_type)
raise ValueError(msg)
# See gh-8278. Considered converting only if
# A.shape[0]*A.shape[1] > np.iinfo(np.int32).max,
# but that didn't always fix the issue.
family = family[0] + "l"
A_new = copy.copy(A)
A_new.indptr = np.array(A.indptr, copy=False, dtype=np.int64)
A_new.indices = np.array(A.indices, copy=False, dtype=np.int64)
return family, A_new
def spsolve(A, b, permc_spec=None, use_umfpack=True):
"""Solve the sparse linear system Ax=b, where b may be a vector or a matrix.
Parameters
----------
A : ndarray or sparse matrix
The square matrix A will be converted into CSC or CSR form
b : ndarray or sparse matrix
The matrix or vector representing the right hand side of the equation.
If a vector, b.shape must be (n,) or (n, 1).
permc_spec : str, optional
How to permute the columns of the matrix for sparsity preservation.
(default: 'COLAMD')
- ``NATURAL``: natural ordering.
- ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
- ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
- ``COLAMD``: approximate minimum degree column ordering
use_umfpack : bool, optional
if True (default) then use umfpack for the solution. This is
only referenced if b is a vector and ``scikit-umfpack`` is installed.
Returns
-------
x : ndarray or sparse matrix
the solution of the sparse linear equation.
If b is a vector, then x is a vector of size A.shape[1]
If b is a matrix, then x is a matrix of size (A.shape[1], b.shape[1])
Notes
-----
For solving the matrix expression AX = B, this solver assumes the resulting
matrix X is sparse, as is often the case for very sparse inputs. If the
resulting X is dense, the construction of this sparse result will be
relatively expensive. In that case, consider converting A to a dense
matrix and using scipy.linalg.solve or its variants.
Examples
--------
>>> from scipy.sparse import csc_matrix
>>> from scipy.sparse.linalg import spsolve
>>> A = csc_matrix([[3, 2, 0], [1, -1, 0], [0, 5, 1]], dtype=float)
>>> B = csc_matrix([[2, 0], [-1, 0], [2, 0]], dtype=float)
>>> x = spsolve(A, B)
>>> np.allclose(A.dot(x).todense(), B.todense())
True
"""
if is_pydata_spmatrix(A):
A = A.to_scipy_sparse().tocsc()
if not (isspmatrix_csc(A) or isspmatrix_csr(A)):
A = csc_matrix(A)
warn('spsolve requires A be CSC or CSR matrix format',
SparseEfficiencyWarning)
# b is a vector only if b have shape (n,) or (n, 1)
b_is_sparse = isspmatrix(b) or is_pydata_spmatrix(b)
if not b_is_sparse:
b = asarray(b)
b_is_vector = ((b.ndim == 1) or (b.ndim == 2 and b.shape[1] == 1))
# sum duplicates for non-canonical format
A.sum_duplicates()
A = A.asfptype() # upcast to a floating point format
result_dtype = np.promote_types(A.dtype, b.dtype)
if A.dtype != result_dtype:
A = A.astype(result_dtype)
if b.dtype != result_dtype:
b = b.astype(result_dtype)
# validate input shapes
M, N = A.shape
if (M != N):
raise ValueError("matrix must be square (has shape %s)" % ((M, N),))
if M != b.shape[0]:
raise ValueError("matrix - rhs dimension mismatch (%s - %s)"
% (A.shape, b.shape[0]))
use_umfpack = use_umfpack and useUmfpack
if b_is_vector and use_umfpack:
if b_is_sparse:
b_vec = b.toarray()
else:
b_vec = b
b_vec = asarray(b_vec, dtype=A.dtype).ravel()
if noScikit:
raise RuntimeError('Scikits.umfpack not installed.')
if A.dtype.char not in 'dD':
raise ValueError("convert matrix data to double, please, using"
" .astype(), or set linsolve.useUmfpack = False")
umf_family, A = _get_umf_family(A)
umf = umfpack.UmfpackContext(umf_family)
x = umf.linsolve(umfpack.UMFPACK_A, A, b_vec,
autoTranspose=True)
else:
if b_is_vector and b_is_sparse:
b = b.toarray()
b_is_sparse = False
if not b_is_sparse:
if isspmatrix_csc(A):
flag = 1 # CSC format
else:
flag = 0 # CSR format
options = dict(ColPerm=permc_spec)
x, info = _superlu.gssv(N, A.nnz, A.data, A.indices, A.indptr,
b, flag, options=options)
if info != 0:
warn("Matrix is exactly singular", MatrixRankWarning)
x.fill(np.nan)
if b_is_vector:
x = x.ravel()
else:
# b is sparse
Afactsolve = factorized(A)
if not (isspmatrix_csc(b) or is_pydata_spmatrix(b)):
warn('spsolve is more efficient when sparse b '
'is in the CSC matrix format', SparseEfficiencyWarning)
b = csc_matrix(b)
# Create a sparse output matrix by repeatedly applying
# the sparse factorization to solve columns of b.
data_segs = []
row_segs = []
col_segs = []
for j in range(b.shape[1]):
bj = np.asarray(b[:, j].todense()).ravel()
xj = Afactsolve(bj)
w = np.flatnonzero(xj)
segment_length = w.shape[0]
row_segs.append(w)
col_segs.append(np.full(segment_length, j, dtype=int))
data_segs.append(np.asarray(xj[w], dtype=A.dtype))
sparse_data = np.concatenate(data_segs)
sparse_row = np.concatenate(row_segs)
sparse_col = np.concatenate(col_segs)
x = A.__class__((sparse_data, (sparse_row, sparse_col)),
shape=b.shape, dtype=A.dtype)
if is_pydata_spmatrix(b):
x = b.__class__(x)
return x
def splu(A, permc_spec=None, diag_pivot_thresh=None,
relax=None, panel_size=None, options=dict()):
"""
Compute the LU decomposition of a sparse, square matrix.
Parameters
----------
A : sparse matrix
Sparse matrix to factorize. Should be in CSR or CSC format.
permc_spec : str, optional
How to permute the columns of the matrix for sparsity preservation.
(default: 'COLAMD')
- ``NATURAL``: natural ordering.
- ``MMD_ATA``: minimum degree ordering on the structure of A^T A.
- ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A.
- ``COLAMD``: approximate minimum degree column ordering
diag_pivot_thresh : float, optional
Threshold used for a diagonal entry to be an acceptable pivot.
See SuperLU user's guide for details [1]_
relax : int, optional
Expert option for customizing the degree of relaxing supernodes.
See SuperLU user's guide for details [1]_
panel_size : int, optional
Expert option for customizing the panel size.
See SuperLU user's guide for details [1]_
options : dict, optional
Dictionary containing additional expert options to SuperLU.
See SuperLU user guide [1]_ (section 2.4 on the 'Options' argument)
for more details. For example, you can specify
``options=dict(Equil=False, IterRefine='SINGLE'))``
to turn equilibration off and perform a single iterative refinement.
Returns
-------
invA : scipy.sparse.linalg.SuperLU
Object, which has a ``solve`` method.
See also
--------
spilu : incomplete LU decomposition
Notes
-----
This function uses the SuperLU library.
References
----------
.. [1] SuperLU http://crd.lbl.gov/~xiaoye/SuperLU/
Examples
--------
>>> from scipy.sparse import csc_matrix
>>> from scipy.sparse.linalg import splu
>>> A = csc_matrix([[1., 0., 0.], [5., 0., 2.], [0., -1., 0.]], dtype=float)
>>> B = splu(A)
>>> x = np.array([1., 2., 3.], dtype=float)
>>> B.solve(x)
array([ 1. , -3. , -1.5])
>>> A.dot(B.solve(x))
array([ 1., 2., 3.])
>>> B.solve(A.dot(x))
array([ 1., 2., 3.])
"""
if is_pydata_spmatrix(A):
csc_construct_func = lambda *a, cls=type(A): cls(csc_matrix(*a))
A = A.to_scipy_sparse().tocsc()
else:
csc_construct_func = csc_matrix
if not isspmatrix_csc(A):
A = csc_matrix(A)
warn('splu requires CSC matrix format', SparseEfficiencyWarning)
# sum duplicates for non-canonical format
A.sum_duplicates()
A = A.asfptype() # upcast to a floating point format
M, N = A.shape
if (M != N):
raise ValueError("can only factor square matrices") # is this true?
_options = dict(DiagPivotThresh=diag_pivot_thresh, ColPerm=permc_spec,
PanelSize=panel_size, Relax=relax)
if options is not None:
_options.update(options)
# Ensure that no column permutations are applied
if (_options["ColPerm"] == "NATURAL"):
_options["SymmetricMode"] = True
return _superlu.gstrf(N, A.nnz, A.data, A.indices, A.indptr,
csc_construct_func=csc_construct_func,
ilu=False, options=_options)
def spilu(A, drop_tol=None, fill_factor=None, drop_rule=None, permc_spec=None,
diag_pivot_thresh=None, relax=None, panel_size=None, options=None):
"""
Compute an incomplete LU decomposition for a sparse, square matrix.
The resulting object is an approximation to the inverse of `A`.
Parameters
----------
A : (N, N) array_like
Sparse matrix to factorize
drop_tol : float, optional
Drop tolerance (0 <= tol <= 1) for an incomplete LU decomposition.
(default: 1e-4)
fill_factor : float, optional
Specifies the fill ratio upper bound (>= 1.0) for ILU. (default: 10)
drop_rule : str, optional
Comma-separated string of drop rules to use.
Available rules: ``basic``, ``prows``, ``column``, ``area``,
``secondary``, ``dynamic``, ``interp``. (Default: ``basic,area``)
See SuperLU documentation for details.
Remaining other options
Same as for `splu`
Returns
-------
invA_approx : scipy.sparse.linalg.SuperLU
Object, which has a ``solve`` method.
See also
--------
splu : complete LU decomposition
Notes
-----
To improve the better approximation to the inverse, you may need to
increase `fill_factor` AND decrease `drop_tol`.
This function uses the SuperLU library.
Examples
--------
>>> from scipy.sparse import csc_matrix
>>> from scipy.sparse.linalg import spilu
>>> A = csc_matrix([[1., 0., 0.], [5., 0., 2.], [0., -1., 0.]], dtype=float)
>>> B = spilu(A)
>>> x = np.array([1., 2., 3.], dtype=float)
>>> B.solve(x)
array([ 1. , -3. , -1.5])
>>> A.dot(B.solve(x))
array([ 1., 2., 3.])
>>> B.solve(A.dot(x))
array([ 1., 2., 3.])
"""
if is_pydata_spmatrix(A):
csc_construct_func = lambda *a, cls=type(A): cls(csc_matrix(*a))
A = A.to_scipy_sparse().tocsc()
else:
csc_construct_func = csc_matrix
if not isspmatrix_csc(A):
A = csc_matrix(A)
warn('splu requires CSC matrix format', SparseEfficiencyWarning)
# sum duplicates for non-canonical format
A.sum_duplicates()
A = A.asfptype() # upcast to a floating point format
M, N = A.shape
if (M != N):
raise ValueError("can only factor square matrices") # is this true?
_options = dict(ILU_DropRule=drop_rule, ILU_DropTol=drop_tol,
ILU_FillFactor=fill_factor,
DiagPivotThresh=diag_pivot_thresh, ColPerm=permc_spec,
PanelSize=panel_size, Relax=relax)
if options is not None:
_options.update(options)
# Ensure that no column permutations are applied
if (_options["ColPerm"] == "NATURAL"):
_options["SymmetricMode"] = True
return _superlu.gstrf(N, A.nnz, A.data, A.indices, A.indptr,
csc_construct_func=csc_construct_func,
ilu=True, options=_options)
def factorized(A):
"""
Return a function for solving a sparse linear system, with A pre-factorized.
Parameters
----------
A : (N, N) array_like
Input.
Returns
-------
solve : callable
To solve the linear system of equations given in `A`, the `solve`
callable should be passed an ndarray of shape (N,).
Examples
--------
>>> from scipy.sparse.linalg import factorized
>>> A = np.array([[ 3. , 2. , -1. ],
... [ 2. , -2. , 4. ],
... [-1. , 0.5, -1. ]])
>>> solve = factorized(A) # Makes LU decomposition.
>>> rhs1 = np.array([1, -2, 0])
>>> solve(rhs1) # Uses the LU factors.
array([ 1., -2., -2.])
"""
if is_pydata_spmatrix(A):
A = A.to_scipy_sparse().tocsc()
if useUmfpack:
if noScikit:
raise RuntimeError('Scikits.umfpack not installed.')
if not isspmatrix_csc(A):
A = csc_matrix(A)
warn('splu requires CSC matrix format', SparseEfficiencyWarning)
A = A.asfptype() # upcast to a floating point format
if A.dtype.char not in 'dD':
raise ValueError("convert matrix data to double, please, using"
" .astype(), or set linsolve.useUmfpack = False")
umf_family, A = _get_umf_family(A)
umf = umfpack.UmfpackContext(umf_family)
# Make LU decomposition.
umf.numeric(A)
def solve(b):
return umf.solve(umfpack.UMFPACK_A, A, b, autoTranspose=True)
return solve
else:
return splu(A).solve
def spsolve_triangular(A, b, lower=True, overwrite_A=False, overwrite_b=False,
unit_diagonal=False):
"""
Solve the equation `A x = b` for `x`, assuming A is a triangular matrix.
Parameters
----------
A : (M, M) sparse matrix
A sparse square triangular matrix. Should be in CSR format.
b : (M,) or (M, N) array_like
Right-hand side matrix in `A x = b`
lower : bool, optional
Whether `A` is a lower or upper triangular matrix.
Default is lower triangular matrix.
overwrite_A : bool, optional
Allow changing `A`. The indices of `A` are going to be sorted and zero
entries are going to be removed.
Enabling gives a performance gain. Default is False.
overwrite_b : bool, optional
Allow overwriting data in `b`.
Enabling gives a performance gain. Default is False.
If `overwrite_b` is True, it should be ensured that
`b` has an appropriate dtype to be able to store the result.
unit_diagonal : bool, optional
If True, diagonal elements of `a` are assumed to be 1 and will not be
referenced.
.. versionadded:: 1.4.0
Returns
-------
x : (M,) or (M, N) ndarray
Solution to the system `A x = b`. Shape of return matches shape of `b`.
Raises
------
LinAlgError
If `A` is singular or not triangular.
ValueError
If shape of `A` or shape of `b` do not match the requirements.
Notes
-----
.. versionadded:: 0.19.0
Examples
--------
>>> from scipy.sparse import csr_matrix
>>> from scipy.sparse.linalg import spsolve_triangular
>>> A = csr_matrix([[3, 0, 0], [1, -1, 0], [2, 0, 1]], dtype=float)
>>> B = np.array([[2, 0], [-1, 0], [2, 0]], dtype=float)
>>> x = spsolve_triangular(A, B)
>>> np.allclose(A.dot(x), B)
True
"""
if is_pydata_spmatrix(A):
A = A.to_scipy_sparse().tocsr()
# Check the input for correct type and format.
if not isspmatrix_csr(A):
warn('CSR matrix format is required. Converting to CSR matrix.',
SparseEfficiencyWarning)
A = csr_matrix(A)
elif not overwrite_A:
A = A.copy()
if A.shape[0] != A.shape[1]:
raise ValueError(
'A must be a square matrix but its shape is {}.'.format(A.shape))
# sum duplicates for non-canonical format
A.sum_duplicates()
b = np.asanyarray(b)
if b.ndim not in [1, 2]:
raise ValueError(
'b must have 1 or 2 dims but its shape is {}.'.format(b.shape))
if A.shape[0] != b.shape[0]:
raise ValueError(
'The size of the dimensions of A must be equal to '
'the size of the first dimension of b but the shape of A is '
'{} and the shape of b is {}.'.format(A.shape, b.shape))
# Init x as (a copy of) b.
x_dtype = np.result_type(A.data, b, np.float64)
if overwrite_b:
if np.can_cast(b.dtype, x_dtype, casting='same_kind'):
x = b
else:
raise ValueError(
'Cannot overwrite b (dtype {}) with result '
'of type {}.'.format(b.dtype, x_dtype))
else:
x = b.astype(x_dtype, copy=True)
# Choose forward or backward order.
if lower:
row_indices = range(len(b))
else:
row_indices = range(len(b) - 1, -1, -1)
# Fill x iteratively.
for i in row_indices:
# Get indices for i-th row.
indptr_start = A.indptr[i]
indptr_stop = A.indptr[i + 1]
if lower:
A_diagonal_index_row_i = indptr_stop - 1
A_off_diagonal_indices_row_i = slice(indptr_start, indptr_stop - 1)
else:
A_diagonal_index_row_i = indptr_start
A_off_diagonal_indices_row_i = slice(indptr_start + 1, indptr_stop)
# Check regularity and triangularity of A.
if not unit_diagonal and (indptr_stop <= indptr_start
or A.indices[A_diagonal_index_row_i] < i):
raise LinAlgError(
'A is singular: diagonal {} is zero.'.format(i))
if A.indices[A_diagonal_index_row_i] > i:
raise LinAlgError(
'A is not triangular: A[{}, {}] is nonzero.'
''.format(i, A.indices[A_diagonal_index_row_i]))
# Incorporate off-diagonal entries.
A_column_indices_in_row_i = A.indices[A_off_diagonal_indices_row_i]
A_values_in_row_i = A.data[A_off_diagonal_indices_row_i]
x[i] -= np.dot(x[A_column_indices_in_row_i].T, A_values_in_row_i)
# Compute i-th entry of x.
if not unit_diagonal:
x[i] /= A.data[A_diagonal_index_row_i]
return x

View file

@ -0,0 +1,53 @@
from os.path import join, dirname
import sys
import glob
def configuration(parent_package='',top_path=None):
from numpy.distutils.misc_util import Configuration
from scipy._build_utils.system_info import get_info
from scipy._build_utils import numpy_nodepr_api
config = Configuration('dsolve',parent_package,top_path)
config.add_data_dir('tests')
lapack_opt = get_info('lapack_opt',notfound_action=2)
if sys.platform == 'win32':
superlu_defs = [('NO_TIMER',1)]
else:
superlu_defs = []
superlu_defs.append(('USE_VENDOR_BLAS',1))
superlu_src = join(dirname(__file__), 'SuperLU', 'SRC')
sources = sorted(glob.glob(join(superlu_src, '*.c')))
headers = list(glob.glob(join(superlu_src, '*.h')))
config.add_library('superlu_src',
sources=sources,
macros=superlu_defs,
include_dirs=[superlu_src],
)
# Extension
ext_sources = ['_superlumodule.c',
'_superlu_utils.c',
'_superluobject.c']
config.add_extension('_superlu',
sources=ext_sources,
libraries=['superlu_src'],
depends=(sources + headers),
extra_info=lapack_opt,
**numpy_nodepr_api
)
# Add license files
config.add_data_files('SuperLU/License.txt')
return config
if __name__ == '__main__':
from numpy.distutils.core import setup
setup(**configuration(top_path='').todict())

View file

@ -0,0 +1,777 @@
import sys
import threading
import numpy as np
from numpy import array, finfo, arange, eye, all, unique, ones, dot
import numpy.random as random
from numpy.testing import (
assert_array_almost_equal, assert_almost_equal,
assert_equal, assert_array_equal, assert_, assert_allclose,
assert_warns, suppress_warnings)
import pytest
from pytest import raises as assert_raises
import scipy.linalg
from scipy.linalg import norm, inv
from scipy.sparse import (spdiags, SparseEfficiencyWarning, csc_matrix,
csr_matrix, identity, isspmatrix, dok_matrix, lil_matrix, bsr_matrix)
from scipy.sparse.linalg import SuperLU
from scipy.sparse.linalg.dsolve import (spsolve, use_solver, splu, spilu,
MatrixRankWarning, _superlu, spsolve_triangular, factorized)
import scipy.sparse
from scipy._lib._testutils import check_free_memory
sup_sparse_efficiency = suppress_warnings()
sup_sparse_efficiency.filter(SparseEfficiencyWarning)
# scikits.umfpack is not a SciPy dependency but it is optionally used in
# dsolve, so check whether it's available
try:
import scikits.umfpack as umfpack
has_umfpack = True
except ImportError:
has_umfpack = False
def toarray(a):
if isspmatrix(a):
return a.toarray()
else:
return a
def setup_bug_8278():
N = 2 ** 6
h = 1/N
Ah1D = scipy.sparse.diags([-1, 2, -1], [-1, 0, 1],
shape=(N-1, N-1))/(h**2)
eyeN = scipy.sparse.eye(N - 1)
A = (scipy.sparse.kron(eyeN, scipy.sparse.kron(eyeN, Ah1D))
+ scipy.sparse.kron(eyeN, scipy.sparse.kron(Ah1D, eyeN))
+ scipy.sparse.kron(Ah1D, scipy.sparse.kron(eyeN, eyeN)))
b = np.random.rand((N-1)**3)
return A, b
class TestFactorized(object):
def setup_method(self):
n = 5
d = arange(n) + 1
self.n = n
self.A = spdiags((d, 2*d, d[::-1]), (-3, 0, 5), n, n).tocsc()
random.seed(1234)
def _check_singular(self):
A = csc_matrix((5,5), dtype='d')
b = ones(5)
assert_array_almost_equal(0. * b, factorized(A)(b))
def _check_non_singular(self):
# Make a diagonal dominant, to make sure it is not singular
n = 5
a = csc_matrix(random.rand(n, n))
b = ones(n)
expected = splu(a).solve(b)
assert_array_almost_equal(factorized(a)(b), expected)
def test_singular_without_umfpack(self):
use_solver(useUmfpack=False)
with assert_raises(RuntimeError, match="Factor is exactly singular"):
self._check_singular()
@pytest.mark.skipif(not has_umfpack, reason="umfpack not available")
def test_singular_with_umfpack(self):
use_solver(useUmfpack=True)
with suppress_warnings() as sup:
sup.filter(RuntimeWarning, "divide by zero encountered in double_scalars")
assert_warns(umfpack.UmfpackWarning, self._check_singular)
def test_non_singular_without_umfpack(self):
use_solver(useUmfpack=False)
self._check_non_singular()
@pytest.mark.skipif(not has_umfpack, reason="umfpack not available")
def test_non_singular_with_umfpack(self):
use_solver(useUmfpack=True)
self._check_non_singular()
def test_cannot_factorize_nonsquare_matrix_without_umfpack(self):
use_solver(useUmfpack=False)
msg = "can only factor square matrices"
with assert_raises(ValueError, match=msg):
factorized(self.A[:, :4])
@pytest.mark.skipif(not has_umfpack, reason="umfpack not available")
def test_factorizes_nonsquare_matrix_with_umfpack(self):
use_solver(useUmfpack=True)
# does not raise
factorized(self.A[:,:4])
def test_call_with_incorrectly_sized_matrix_without_umfpack(self):
use_solver(useUmfpack=False)
solve = factorized(self.A)
b = random.rand(4)
B = random.rand(4, 3)
BB = random.rand(self.n, 3, 9)
with assert_raises(ValueError, match="is of incompatible size"):
solve(b)
with assert_raises(ValueError, match="is of incompatible size"):
solve(B)
with assert_raises(ValueError,
match="object too deep for desired array"):
solve(BB)
@pytest.mark.skipif(not has_umfpack, reason="umfpack not available")
def test_call_with_incorrectly_sized_matrix_with_umfpack(self):
use_solver(useUmfpack=True)
solve = factorized(self.A)
b = random.rand(4)
B = random.rand(4, 3)
BB = random.rand(self.n, 3, 9)
# does not raise
solve(b)
msg = "object too deep for desired array"
with assert_raises(ValueError, match=msg):
solve(B)
with assert_raises(ValueError, match=msg):
solve(BB)
def test_call_with_cast_to_complex_without_umfpack(self):
use_solver(useUmfpack=False)
solve = factorized(self.A)
b = random.rand(4)
for t in [np.complex64, np.complex128]:
with assert_raises(TypeError, match="Cannot cast array data"):
solve(b.astype(t))
@pytest.mark.skipif(not has_umfpack, reason="umfpack not available")
def test_call_with_cast_to_complex_with_umfpack(self):
use_solver(useUmfpack=True)
solve = factorized(self.A)
b = random.rand(4)
for t in [np.complex64, np.complex128]:
assert_warns(np.ComplexWarning, solve, b.astype(t))
@pytest.mark.skipif(not has_umfpack, reason="umfpack not available")
def test_assume_sorted_indices_flag(self):
# a sparse matrix with unsorted indices
unsorted_inds = np.array([2, 0, 1, 0])
data = np.array([10, 16, 5, 0.4])
indptr = np.array([0, 1, 2, 4])
A = csc_matrix((data, unsorted_inds, indptr), (3, 3))
b = ones(3)
# should raise when incorrectly assuming indices are sorted
use_solver(useUmfpack=True, assumeSortedIndices=True)
with assert_raises(RuntimeError,
match="UMFPACK_ERROR_invalid_matrix"):
factorized(A)
# should sort indices and succeed when not assuming indices are sorted
use_solver(useUmfpack=True, assumeSortedIndices=False)
expected = splu(A.copy()).solve(b)
assert_equal(A.has_sorted_indices, 0)
assert_array_almost_equal(factorized(A)(b), expected)
@pytest.mark.slow
@pytest.mark.skipif(not has_umfpack, reason="umfpack not available")
def test_bug_8278(self):
check_free_memory(8000)
use_solver(useUmfpack=True)
A, b = setup_bug_8278()
A = A.tocsc()
f = factorized(A)
x = f(b)
assert_array_almost_equal(A @ x, b)
class TestLinsolve(object):
def setup_method(self):
use_solver(useUmfpack=False)
def test_singular(self):
A = csc_matrix((5,5), dtype='d')
b = array([1, 2, 3, 4, 5],dtype='d')
with suppress_warnings() as sup:
sup.filter(MatrixRankWarning, "Matrix is exactly singular")
x = spsolve(A, b)
assert_(not np.isfinite(x).any())
def test_singular_gh_3312(self):
# "Bad" test case that leads SuperLU to call LAPACK with invalid
# arguments. Check that it fails moderately gracefully.
ij = np.array([(17, 0), (17, 6), (17, 12), (10, 13)], dtype=np.int32)
v = np.array([0.284213, 0.94933781, 0.15767017, 0.38797296])
A = csc_matrix((v, ij.T), shape=(20, 20))
b = np.arange(20)
try:
# should either raise a runtimeerror or return value
# appropriate for singular input
x = spsolve(A, b)
assert_(not np.isfinite(x).any())
except RuntimeError:
pass
def test_twodiags(self):
A = spdiags([[1, 2, 3, 4, 5], [6, 5, 8, 9, 10]], [0, 1], 5, 5)
b = array([1, 2, 3, 4, 5])
# condition number of A
cond_A = norm(A.todense(),2) * norm(inv(A.todense()),2)
for t in ['f','d','F','D']:
eps = finfo(t).eps # floating point epsilon
b = b.astype(t)
for format in ['csc','csr']:
Asp = A.astype(t).asformat(format)
x = spsolve(Asp,b)
assert_(norm(b - Asp*x) < 10 * cond_A * eps)
def test_bvector_smoketest(self):
Adense = array([[0., 1., 1.],
[1., 0., 1.],
[0., 0., 1.]])
As = csc_matrix(Adense)
random.seed(1234)
x = random.randn(3)
b = As*x
x2 = spsolve(As, b)
assert_array_almost_equal(x, x2)
def test_bmatrix_smoketest(self):
Adense = array([[0., 1., 1.],
[1., 0., 1.],
[0., 0., 1.]])
As = csc_matrix(Adense)
random.seed(1234)
x = random.randn(3, 4)
Bdense = As.dot(x)
Bs = csc_matrix(Bdense)
x2 = spsolve(As, Bs)
assert_array_almost_equal(x, x2.todense())
@sup_sparse_efficiency
def test_non_square(self):
# A is not square.
A = ones((3, 4))
b = ones((4, 1))
assert_raises(ValueError, spsolve, A, b)
# A2 and b2 have incompatible shapes.
A2 = csc_matrix(eye(3))
b2 = array([1.0, 2.0])
assert_raises(ValueError, spsolve, A2, b2)
@sup_sparse_efficiency
def test_example_comparison(self):
row = array([0,0,1,2,2,2])
col = array([0,2,2,0,1,2])
data = array([1,2,3,-4,5,6])
sM = csr_matrix((data,(row,col)), shape=(3,3), dtype=float)
M = sM.todense()
row = array([0,0,1,1,0,0])
col = array([0,2,1,1,0,0])
data = array([1,1,1,1,1,1])
sN = csr_matrix((data, (row,col)), shape=(3,3), dtype=float)
N = sN.todense()
sX = spsolve(sM, sN)
X = scipy.linalg.solve(M, N)
assert_array_almost_equal(X, sX.todense())
@sup_sparse_efficiency
@pytest.mark.skipif(not has_umfpack, reason="umfpack not available")
def test_shape_compatibility(self):
use_solver(useUmfpack=True)
A = csc_matrix([[1., 0], [0, 2]])
bs = [
[1, 6],
array([1, 6]),
[[1], [6]],
array([[1], [6]]),
csc_matrix([[1], [6]]),
csr_matrix([[1], [6]]),
dok_matrix([[1], [6]]),
bsr_matrix([[1], [6]]),
array([[1., 2., 3.], [6., 8., 10.]]),
csc_matrix([[1., 2., 3.], [6., 8., 10.]]),
csr_matrix([[1., 2., 3.], [6., 8., 10.]]),
dok_matrix([[1., 2., 3.], [6., 8., 10.]]),
bsr_matrix([[1., 2., 3.], [6., 8., 10.]]),
]
for b in bs:
x = np.linalg.solve(A.toarray(), toarray(b))
for spmattype in [csc_matrix, csr_matrix, dok_matrix, lil_matrix]:
x1 = spsolve(spmattype(A), b, use_umfpack=True)
x2 = spsolve(spmattype(A), b, use_umfpack=False)
# check solution
if x.ndim == 2 and x.shape[1] == 1:
# interprets also these as "vectors"
x = x.ravel()
assert_array_almost_equal(toarray(x1), x, err_msg=repr((b, spmattype, 1)))
assert_array_almost_equal(toarray(x2), x, err_msg=repr((b, spmattype, 2)))
# dense vs. sparse output ("vectors" are always dense)
if isspmatrix(b) and x.ndim > 1:
assert_(isspmatrix(x1), repr((b, spmattype, 1)))
assert_(isspmatrix(x2), repr((b, spmattype, 2)))
else:
assert_(isinstance(x1, np.ndarray), repr((b, spmattype, 1)))
assert_(isinstance(x2, np.ndarray), repr((b, spmattype, 2)))
# check output shape
if x.ndim == 1:
# "vector"
assert_equal(x1.shape, (A.shape[1],))
assert_equal(x2.shape, (A.shape[1],))
else:
# "matrix"
assert_equal(x1.shape, x.shape)
assert_equal(x2.shape, x.shape)
A = csc_matrix((3, 3))
b = csc_matrix((1, 3))
assert_raises(ValueError, spsolve, A, b)
@sup_sparse_efficiency
def test_ndarray_support(self):
A = array([[1., 2.], [2., 0.]])
x = array([[1., 1.], [0.5, -0.5]])
b = array([[2., 0.], [2., 2.]])
assert_array_almost_equal(x, spsolve(A, b))
def test_gssv_badinput(self):
N = 10
d = arange(N) + 1.0
A = spdiags((d, 2*d, d[::-1]), (-3, 0, 5), N, N)
for spmatrix in (csc_matrix, csr_matrix):
A = spmatrix(A)
b = np.arange(N)
def not_c_contig(x):
return x.repeat(2)[::2]
def not_1dim(x):
return x[:,None]
def bad_type(x):
return x.astype(bool)
def too_short(x):
return x[:-1]
badops = [not_c_contig, not_1dim, bad_type, too_short]
for badop in badops:
msg = "%r %r" % (spmatrix, badop)
# Not C-contiguous
assert_raises((ValueError, TypeError), _superlu.gssv,
N, A.nnz, badop(A.data), A.indices, A.indptr,
b, int(spmatrix == csc_matrix), err_msg=msg)
assert_raises((ValueError, TypeError), _superlu.gssv,
N, A.nnz, A.data, badop(A.indices), A.indptr,
b, int(spmatrix == csc_matrix), err_msg=msg)
assert_raises((ValueError, TypeError), _superlu.gssv,
N, A.nnz, A.data, A.indices, badop(A.indptr),
b, int(spmatrix == csc_matrix), err_msg=msg)
def test_sparsity_preservation(self):
ident = csc_matrix([
[1, 0, 0],
[0, 1, 0],
[0, 0, 1]])
b = csc_matrix([
[0, 1],
[1, 0],
[0, 0]])
x = spsolve(ident, b)
assert_equal(ident.nnz, 3)
assert_equal(b.nnz, 2)
assert_equal(x.nnz, 2)
assert_allclose(x.A, b.A, atol=1e-12, rtol=1e-12)
def test_dtype_cast(self):
A_real = scipy.sparse.csr_matrix([[1, 2, 0],
[0, 0, 3],
[4, 0, 5]])
A_complex = scipy.sparse.csr_matrix([[1, 2, 0],
[0, 0, 3],
[4, 0, 5 + 1j]])
b_real = np.array([1,1,1])
b_complex = np.array([1,1,1]) + 1j*np.array([1,1,1])
x = spsolve(A_real, b_real)
assert_(np.issubdtype(x.dtype, np.floating))
x = spsolve(A_real, b_complex)
assert_(np.issubdtype(x.dtype, np.complexfloating))
x = spsolve(A_complex, b_real)
assert_(np.issubdtype(x.dtype, np.complexfloating))
x = spsolve(A_complex, b_complex)
assert_(np.issubdtype(x.dtype, np.complexfloating))
@pytest.mark.slow
@pytest.mark.skipif(not has_umfpack, reason="umfpack not available")
def test_bug_8278(self):
check_free_memory(8000)
use_solver(useUmfpack=True)
A, b = setup_bug_8278()
x = spsolve(A, b)
assert_array_almost_equal(A @ x, b)
class TestSplu(object):
def setup_method(self):
use_solver(useUmfpack=False)
n = 40
d = arange(n) + 1
self.n = n
self.A = spdiags((d, 2*d, d[::-1]), (-3, 0, 5), n, n)
random.seed(1234)
def _smoketest(self, spxlu, check, dtype):
if np.issubdtype(dtype, np.complexfloating):
A = self.A + 1j*self.A.T
else:
A = self.A
A = A.astype(dtype)
lu = spxlu(A)
rng = random.RandomState(1234)
# Input shapes
for k in [None, 1, 2, self.n, self.n+2]:
msg = "k=%r" % (k,)
if k is None:
b = rng.rand(self.n)
else:
b = rng.rand(self.n, k)
if np.issubdtype(dtype, np.complexfloating):
b = b + 1j*rng.rand(*b.shape)
b = b.astype(dtype)
x = lu.solve(b)
check(A, b, x, msg)
x = lu.solve(b, 'T')
check(A.T, b, x, msg)
x = lu.solve(b, 'H')
check(A.T.conj(), b, x, msg)
@sup_sparse_efficiency
def test_splu_smoketest(self):
self._internal_test_splu_smoketest()
def _internal_test_splu_smoketest(self):
# Check that splu works at all
def check(A, b, x, msg=""):
eps = np.finfo(A.dtype).eps
r = A * x
assert_(abs(r - b).max() < 1e3*eps, msg)
self._smoketest(splu, check, np.float32)
self._smoketest(splu, check, np.float64)
self._smoketest(splu, check, np.complex64)
self._smoketest(splu, check, np.complex128)
@sup_sparse_efficiency
def test_spilu_smoketest(self):
self._internal_test_spilu_smoketest()
def _internal_test_spilu_smoketest(self):
errors = []
def check(A, b, x, msg=""):
r = A * x
err = abs(r - b).max()
assert_(err < 1e-2, msg)
if b.dtype in (np.float64, np.complex128):
errors.append(err)
self._smoketest(spilu, check, np.float32)
self._smoketest(spilu, check, np.float64)
self._smoketest(spilu, check, np.complex64)
self._smoketest(spilu, check, np.complex128)
assert_(max(errors) > 1e-5)
@sup_sparse_efficiency
def test_spilu_drop_rule(self):
# Test passing in the drop_rule argument to spilu.
A = identity(2)
rules = [
b'basic,area'.decode('ascii'), # unicode
b'basic,area', # ascii
[b'basic', b'area'.decode('ascii')]
]
for rule in rules:
# Argument should be accepted
assert_(isinstance(spilu(A, drop_rule=rule), SuperLU))
def test_splu_nnz0(self):
A = csc_matrix((5,5), dtype='d')
assert_raises(RuntimeError, splu, A)
def test_spilu_nnz0(self):
A = csc_matrix((5,5), dtype='d')
assert_raises(RuntimeError, spilu, A)
def test_splu_basic(self):
# Test basic splu functionality.
n = 30
rng = random.RandomState(12)
a = rng.rand(n, n)
a[a < 0.95] = 0
# First test with a singular matrix
a[:, 0] = 0
a_ = csc_matrix(a)
# Matrix is exactly singular
assert_raises(RuntimeError, splu, a_)
# Make a diagonal dominant, to make sure it is not singular
a += 4*eye(n)
a_ = csc_matrix(a)
lu = splu(a_)
b = ones(n)
x = lu.solve(b)
assert_almost_equal(dot(a, x), b)
def test_splu_perm(self):
# Test the permutation vectors exposed by splu.
n = 30
a = random.random((n, n))
a[a < 0.95] = 0
# Make a diagonal dominant, to make sure it is not singular
a += 4*eye(n)
a_ = csc_matrix(a)
lu = splu(a_)
# Check that the permutation indices do belong to [0, n-1].
for perm in (lu.perm_r, lu.perm_c):
assert_(all(perm > -1))
assert_(all(perm < n))
assert_equal(len(unique(perm)), len(perm))
# Now make a symmetric, and test that the two permutation vectors are
# the same
# Note: a += a.T relies on undefined behavior.
a = a + a.T
a_ = csc_matrix(a)
lu = splu(a_)
assert_array_equal(lu.perm_r, lu.perm_c)
@pytest.mark.parametrize("splu_fun, rtol", [(splu, 1e-7), (spilu, 1e-1)])
def test_natural_permc(self, splu_fun, rtol):
# Test that the "NATURAL" permc_spec does not permute the matrix
np.random.seed(42)
n = 500
p = 0.01
A = scipy.sparse.random(n, n, p)
x = np.random.rand(n)
# Make A diagonal dominant to make sure it is not singular
A += (n+1)*scipy.sparse.identity(n)
A_ = csc_matrix(A)
b = A_ @ x
# without permc_spec, permutation is not identity
lu = splu_fun(A_)
assert_(np.any(lu.perm_c != np.arange(n)))
# with permc_spec="NATURAL", permutation is identity
lu = splu_fun(A_, permc_spec="NATURAL")
assert_array_equal(lu.perm_c, np.arange(n))
# Also, lu decomposition is valid
x2 = lu.solve(b)
assert_allclose(x, x2, rtol=rtol)
@pytest.mark.skipif(not hasattr(sys, 'getrefcount'), reason="no sys.getrefcount")
def test_lu_refcount(self):
# Test that we are keeping track of the reference count with splu.
n = 30
a = random.random((n, n))
a[a < 0.95] = 0
# Make a diagonal dominant, to make sure it is not singular
a += 4*eye(n)
a_ = csc_matrix(a)
lu = splu(a_)
# And now test that we don't have a refcount bug
rc = sys.getrefcount(lu)
for attr in ('perm_r', 'perm_c'):
perm = getattr(lu, attr)
assert_equal(sys.getrefcount(lu), rc + 1)
del perm
assert_equal(sys.getrefcount(lu), rc)
def test_bad_inputs(self):
A = self.A.tocsc()
assert_raises(ValueError, splu, A[:,:4])
assert_raises(ValueError, spilu, A[:,:4])
for lu in [splu(A), spilu(A)]:
b = random.rand(42)
B = random.rand(42, 3)
BB = random.rand(self.n, 3, 9)
assert_raises(ValueError, lu.solve, b)
assert_raises(ValueError, lu.solve, B)
assert_raises(ValueError, lu.solve, BB)
assert_raises(TypeError, lu.solve,
b.astype(np.complex64))
assert_raises(TypeError, lu.solve,
b.astype(np.complex128))
@sup_sparse_efficiency
def test_superlu_dlamch_i386_nan(self):
# SuperLU 4.3 calls some functions returning floats without
# declaring them. On i386@linux call convention, this fails to
# clear floating point registers after call. As a result, NaN
# can appear in the next floating point operation made.
#
# Here's a test case that triggered the issue.
n = 8
d = np.arange(n) + 1
A = spdiags((d, 2*d, d[::-1]), (-3, 0, 5), n, n)
A = A.astype(np.float32)
spilu(A)
A = A + 1j*A
B = A.A
assert_(not np.isnan(B).any())
@sup_sparse_efficiency
def test_lu_attr(self):
def check(dtype, complex_2=False):
A = self.A.astype(dtype)
if complex_2:
A = A + 1j*A.T
n = A.shape[0]
lu = splu(A)
# Check that the decomposition is as advertized
Pc = np.zeros((n, n))
Pc[np.arange(n), lu.perm_c] = 1
Pr = np.zeros((n, n))
Pr[lu.perm_r, np.arange(n)] = 1
Ad = A.toarray()
lhs = Pr.dot(Ad).dot(Pc)
rhs = (lu.L * lu.U).toarray()
eps = np.finfo(dtype).eps
assert_allclose(lhs, rhs, atol=100*eps)
check(np.float32)
check(np.float64)
check(np.complex64)
check(np.complex128)
check(np.complex64, True)
check(np.complex128, True)
@pytest.mark.slow
@sup_sparse_efficiency
def test_threads_parallel(self):
oks = []
def worker():
try:
self.test_splu_basic()
self._internal_test_splu_smoketest()
self._internal_test_spilu_smoketest()
oks.append(True)
except Exception:
pass
threads = [threading.Thread(target=worker)
for k in range(20)]
for t in threads:
t.start()
for t in threads:
t.join()
assert_equal(len(oks), 20)
class TestSpsolveTriangular(object):
def setup_method(self):
use_solver(useUmfpack=False)
def test_singular(self):
n = 5
A = csr_matrix((n, n))
b = np.arange(n)
for lower in (True, False):
assert_raises(scipy.linalg.LinAlgError, spsolve_triangular, A, b, lower=lower)
@sup_sparse_efficiency
def test_bad_shape(self):
# A is not square.
A = np.zeros((3, 4))
b = ones((4, 1))
assert_raises(ValueError, spsolve_triangular, A, b)
# A2 and b2 have incompatible shapes.
A2 = csr_matrix(eye(3))
b2 = array([1.0, 2.0])
assert_raises(ValueError, spsolve_triangular, A2, b2)
@sup_sparse_efficiency
def test_input_types(self):
A = array([[1., 0.], [1., 2.]])
b = array([[2., 0.], [2., 2.]])
for matrix_type in (array, csc_matrix, csr_matrix):
x = spsolve_triangular(matrix_type(A), b, lower=True)
assert_array_almost_equal(A.dot(x), b)
@pytest.mark.slow
@sup_sparse_efficiency
def test_random(self):
def random_triangle_matrix(n, lower=True):
A = scipy.sparse.random(n, n, density=0.1, format='coo')
if lower:
A = scipy.sparse.tril(A)
else:
A = scipy.sparse.triu(A)
A = A.tocsr(copy=False)
for i in range(n):
A[i, i] = np.random.rand() + 1
return A
np.random.seed(1234)
for lower in (True, False):
for n in (10, 10**2, 10**3):
A = random_triangle_matrix(n, lower=lower)
for m in (1, 10):
for b in (np.random.rand(n, m),
np.random.randint(-9, 9, (n, m)),
np.random.randint(-9, 9, (n, m)) +
np.random.randint(-9, 9, (n, m)) * 1j):
x = spsolve_triangular(A, b, lower=lower)
assert_array_almost_equal(A.dot(x), b)
x = spsolve_triangular(A, b, lower=lower,
unit_diagonal=True)
A.setdiag(1)
assert_array_almost_equal(A.dot(x), b)

View file

@ -0,0 +1,16 @@
"""
Sparse Eigenvalue Solvers
-------------------------
The submodules of sparse.linalg.eigen:
1. lobpcg: Locally Optimal Block Preconditioned Conjugate Gradient Method
"""
from .arpack import *
from .lobpcg import *
__all__ = [s for s in dir() if not s.startswith('_')]
from scipy._lib._testutils import PytestTester
test = PytestTester(__name__)
del PytestTester

Some files were not shown because too many files have changed in this diff Show more