Fixed database typo and removed unnecessary class identifier.

2020-10-14 10:10:37 -04:00 · 2020-10-14 10:10:37 -04:00 · 45fb349a7d
commit 45fb349a7d
parent 00ad49a143
5098 changed files with 952558 additions and 85 deletions
--- a/venv/Lib/site-packages/scipy/stats/tests/test_kdeoth.py
+++ b/venv/Lib/site-packages/scipy/stats/tests/test_kdeoth.py
@ -0,0 +1,470 @@
+from scipy import stats
+import numpy as np
+from numpy.testing import (assert_almost_equal, assert_,
+    assert_array_almost_equal, assert_array_almost_equal_nulp, assert_allclose)
+import pytest
+from pytest import raises as assert_raises
+
+
+def test_kde_1d():
+    #some basic tests comparing to normal distribution
+    np.random.seed(8765678)
+    n_basesample = 500
+    xn = np.random.randn(n_basesample)
+    xnmean = xn.mean()
+    xnstd = xn.std(ddof=1)
+
+    # get kde for original sample
+    gkde = stats.gaussian_kde(xn)
+
+    # evaluate the density function for the kde for some points
+    xs = np.linspace(-7,7,501)
+    kdepdf = gkde.evaluate(xs)
+    normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
+    intervall = xs[1] - xs[0]
+
+    assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
+    prob1 = gkde.integrate_box_1d(xnmean, np.inf)
+    prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
+    assert_almost_equal(prob1, 0.5, decimal=1)
+    assert_almost_equal(prob2, 0.5, decimal=1)
+    assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
+    assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
+
+    assert_almost_equal(gkde.integrate_kde(gkde),
+                        (kdepdf**2).sum()*intervall, decimal=2)
+    assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
+                        (kdepdf*normpdf).sum()*intervall, decimal=2)
+
+
+def test_kde_1d_weighted():
+    #some basic tests comparing to normal distribution
+    np.random.seed(8765678)
+    n_basesample = 500
+    xn = np.random.randn(n_basesample)
+    wn = np.random.rand(n_basesample)
+    xnmean = np.average(xn, weights=wn)
+    xnstd = np.sqrt(np.average((xn-xnmean)**2, weights=wn))
+
+    # get kde for original sample
+    gkde = stats.gaussian_kde(xn, weights=wn)
+
+    # evaluate the density function for the kde for some points
+    xs = np.linspace(-7,7,501)
+    kdepdf = gkde.evaluate(xs)
+    normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
+    intervall = xs[1] - xs[0]
+
+    assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
+    prob1 = gkde.integrate_box_1d(xnmean, np.inf)
+    prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
+    assert_almost_equal(prob1, 0.5, decimal=1)
+    assert_almost_equal(prob2, 0.5, decimal=1)
+    assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
+    assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
+
+    assert_almost_equal(gkde.integrate_kde(gkde),
+                        (kdepdf**2).sum()*intervall, decimal=2)
+    assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
+                        (kdepdf*normpdf).sum()*intervall, decimal=2)
+
+
+@pytest.mark.slow
+def test_kde_2d():
+    #some basic tests comparing to normal distribution
+    np.random.seed(8765678)
+    n_basesample = 500
+
+    mean = np.array([1.0, 3.0])
+    covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
+
+    # Need transpose (shape (2, 500)) for kde
+    xn = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
+
+    # get kde for original sample
+    gkde = stats.gaussian_kde(xn)
+
+    # evaluate the density function for the kde for some points
+    x, y = np.mgrid[-7:7:500j, -7:7:500j]
+    grid_coords = np.vstack([x.ravel(), y.ravel()])
+    kdepdf = gkde.evaluate(grid_coords)
+    kdepdf = kdepdf.reshape(500, 500)
+
+    normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]), mean=mean, cov=covariance)
+    intervall = y.ravel()[1] - y.ravel()[0]
+
+    assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01)
+
+    small = -1e100
+    large = 1e100
+    prob1 = gkde.integrate_box([small, mean[1]], [large, large])
+    prob2 = gkde.integrate_box([small, small], [large, mean[1]])
+
+    assert_almost_equal(prob1, 0.5, decimal=1)
+    assert_almost_equal(prob2, 0.5, decimal=1)
+    assert_almost_equal(gkde.integrate_kde(gkde),
+                        (kdepdf**2).sum()*(intervall**2), decimal=2)
+    assert_almost_equal(gkde.integrate_gaussian(mean, covariance),
+                        (kdepdf*normpdf).sum()*(intervall**2), decimal=2)
+
+
+@pytest.mark.slow
+def test_kde_2d_weighted():
+    #some basic tests comparing to normal distribution
+    np.random.seed(8765678)
+    n_basesample = 500
+
+    mean = np.array([1.0, 3.0])
+    covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
+
+    # Need transpose (shape (2, 500)) for kde
+    xn = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
+    wn = np.random.rand(n_basesample)
+
+    # get kde for original sample
+    gkde = stats.gaussian_kde(xn, weights=wn)
+
+    # evaluate the density function for the kde for some points
+    x, y = np.mgrid[-7:7:500j, -7:7:500j]
+    grid_coords = np.vstack([x.ravel(), y.ravel()])
+    kdepdf = gkde.evaluate(grid_coords)
+    kdepdf = kdepdf.reshape(500, 500)
+
+    normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]), mean=mean, cov=covariance)
+    intervall = y.ravel()[1] - y.ravel()[0]
+
+    assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01)
+
+    small = -1e100
+    large = 1e100
+    prob1 = gkde.integrate_box([small, mean[1]], [large, large])
+    prob2 = gkde.integrate_box([small, small], [large, mean[1]])
+
+    assert_almost_equal(prob1, 0.5, decimal=1)
+    assert_almost_equal(prob2, 0.5, decimal=1)
+    assert_almost_equal(gkde.integrate_kde(gkde),
+                        (kdepdf**2).sum()*(intervall**2), decimal=2)
+    assert_almost_equal(gkde.integrate_gaussian(mean, covariance),
+                        (kdepdf*normpdf).sum()*(intervall**2), decimal=2)
+
+
+def test_kde_bandwidth_method():
+    def scotts_factor(kde_obj):
+        """Same as default, just check that it works."""
+        return np.power(kde_obj.n, -1./(kde_obj.d+4))
+
+    np.random.seed(8765678)
+    n_basesample = 50
+    xn = np.random.randn(n_basesample)
+
+    # Default
+    gkde = stats.gaussian_kde(xn)
+    # Supply a callable
+    gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
+    # Supply a scalar
+    gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
+
+    xs = np.linspace(-7,7,51)
+    kdepdf = gkde.evaluate(xs)
+    kdepdf2 = gkde2.evaluate(xs)
+    assert_almost_equal(kdepdf, kdepdf2)
+    kdepdf3 = gkde3.evaluate(xs)
+    assert_almost_equal(kdepdf, kdepdf3)
+
+    assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
+
+
+def test_kde_bandwidth_method_weighted():
+    def scotts_factor(kde_obj):
+        """Same as default, just check that it works."""
+        return np.power(kde_obj.neff, -1./(kde_obj.d+4))
+
+    np.random.seed(8765678)
+    n_basesample = 50
+    xn = np.random.randn(n_basesample)
+
+    # Default
+    gkde = stats.gaussian_kde(xn)
+    # Supply a callable
+    gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
+    # Supply a scalar
+    gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
+
+    xs = np.linspace(-7,7,51)
+    kdepdf = gkde.evaluate(xs)
+    kdepdf2 = gkde2.evaluate(xs)
+    assert_almost_equal(kdepdf, kdepdf2)
+    kdepdf3 = gkde3.evaluate(xs)
+    assert_almost_equal(kdepdf, kdepdf3)
+
+    assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
+
+
+# Subclasses that should stay working (extracted from various sources).
+# Unfortunately the earlier design of gaussian_kde made it necessary for users
+# to create these kinds of subclasses, or call _compute_covariance() directly.
+
+class _kde_subclass1(stats.gaussian_kde):
+    def __init__(self, dataset):
+        self.dataset = np.atleast_2d(dataset)
+        self.d, self.n = self.dataset.shape
+        self.covariance_factor = self.scotts_factor
+        self._compute_covariance()
+
+
+class _kde_subclass2(stats.gaussian_kde):
+    def __init__(self, dataset):
+        self.covariance_factor = self.scotts_factor
+        super(_kde_subclass2, self).__init__(dataset)
+
+
+class _kde_subclass3(stats.gaussian_kde):
+    def __init__(self, dataset, covariance):
+        self.covariance = covariance
+        stats.gaussian_kde.__init__(self, dataset)
+
+    def _compute_covariance(self):
+        self.inv_cov = np.linalg.inv(self.covariance)
+        self._norm_factor = np.sqrt(np.linalg.det(2 * np.pi * self.covariance))
+
+
+class _kde_subclass4(stats.gaussian_kde):
+    def covariance_factor(self):
+        return 0.5 * self.silverman_factor()
+
+
+def test_gaussian_kde_subclassing():
+    x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
+    xs = np.linspace(-10, 10, num=50)
+
+    # gaussian_kde itself
+    kde = stats.gaussian_kde(x1)
+    ys = kde(xs)
+
+    # subclass 1
+    kde1 = _kde_subclass1(x1)
+    y1 = kde1(xs)
+    assert_array_almost_equal_nulp(ys, y1, nulp=10)
+
+    # subclass 2
+    kde2 = _kde_subclass2(x1)
+    y2 = kde2(xs)
+    assert_array_almost_equal_nulp(ys, y2, nulp=10)
+
+    # subclass 3
+    kde3 = _kde_subclass3(x1, kde.covariance)
+    y3 = kde3(xs)
+    assert_array_almost_equal_nulp(ys, y3, nulp=10)
+
+    # subclass 4
+    kde4 = _kde_subclass4(x1)
+    y4 = kde4(x1)
+    y_expected = [0.06292987, 0.06346938, 0.05860291, 0.08657652, 0.07904017]
+
+    assert_array_almost_equal(y_expected, y4, decimal=6)
+
+    # Not a subclass, but check for use of _compute_covariance()
+    kde5 = kde
+    kde5.covariance_factor = lambda: kde.factor
+    kde5._compute_covariance()
+    y5 = kde5(xs)
+    assert_array_almost_equal_nulp(ys, y5, nulp=10)
+
+
+def test_gaussian_kde_covariance_caching():
+    x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
+    xs = np.linspace(-10, 10, num=5)
+    # These expected values are from scipy 0.10, before some changes to
+    # gaussian_kde.  They were not compared with any external reference.
+    y_expected = [0.02463386, 0.04689208, 0.05395444, 0.05337754, 0.01664475]
+
+    # Set the bandwidth, then reset it to the default.
+    kde = stats.gaussian_kde(x1)
+    kde.set_bandwidth(bw_method=0.5)
+    kde.set_bandwidth(bw_method='scott')
+    y2 = kde(xs)
+
+    assert_array_almost_equal(y_expected, y2, decimal=7)
+
+
+def test_gaussian_kde_monkeypatch():
+    """Ugly, but people may rely on this.  See scipy pull request 123,
+    specifically the linked ML thread "Width of the Gaussian in stats.kde".
+    If it is necessary to break this later on, that is to be discussed on ML.
+    """
+    x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
+    xs = np.linspace(-10, 10, num=50)
+
+    # The old monkeypatched version to get at Silverman's Rule.
+    kde = stats.gaussian_kde(x1)
+    kde.covariance_factor = kde.silverman_factor
+    kde._compute_covariance()
+    y1 = kde(xs)
+
+    # The new saner version.
+    kde2 = stats.gaussian_kde(x1, bw_method='silverman')
+    y2 = kde2(xs)
+
+    assert_array_almost_equal_nulp(y1, y2, nulp=10)
+
+
+def test_kde_integer_input():
+    """Regression test for #1181."""
+    x1 = np.arange(5)
+    kde = stats.gaussian_kde(x1)
+    y_expected = [0.13480721, 0.18222869, 0.19514935, 0.18222869, 0.13480721]
+    assert_array_almost_equal(kde(x1), y_expected, decimal=6)
+
+
+_ftypes = ['float32', 'float64', 'float96', 'float128', 'int32', 'int64']
+
+@pytest.mark.parametrize("bw_type", _ftypes + ["scott", "silverman"])
+@pytest.mark.parametrize("weights_type", _ftypes)
+@pytest.mark.parametrize("dataset_type", _ftypes)
+@pytest.mark.parametrize("point_type", _ftypes)
+def test_kde_output_dtype(point_type, dataset_type, weights_type, bw_type):
+    # Check whether the datatypes are available
+    point_type = getattr(np, point_type, None)
+    dataset_type = getattr(np, weights_type, None)
+    weights_type = getattr(np, weights_type, None)
+
+    if bw_type in ["scott", "silverman"]:
+        bw = bw_type
+    else:
+        bw_type = getattr(np, bw_type, None)
+        bw = bw_type(3) if bw_type else None
+
+    if any(dt is None for dt in [point_type, dataset_type, weights_type, bw]):
+        pytest.skip()
+
+    weights = np.arange(5, dtype=weights_type)
+    dataset = np.arange(5, dtype=dataset_type)
+    k = stats.kde.gaussian_kde(dataset, bw_method=bw, weights=weights)
+    points = np.arange(5, dtype=point_type)
+    result = k(points)
+    # weights are always cast to float64
+    assert result.dtype == np.result_type(dataset, points, np.float64(weights),
+                                          k.factor)
+
+
+def test_pdf_logpdf():
+    np.random.seed(1)
+    n_basesample = 50
+    xn = np.random.randn(n_basesample)
+
+    # Default
+    gkde = stats.gaussian_kde(xn)
+
+    xs = np.linspace(-15, 12, 25)
+    pdf = gkde.evaluate(xs)
+    pdf2 = gkde.pdf(xs)
+    assert_almost_equal(pdf, pdf2, decimal=12)
+
+    logpdf = np.log(pdf)
+    logpdf2 = gkde.logpdf(xs)
+    assert_almost_equal(logpdf, logpdf2, decimal=12)
+
+    # There are more points than data
+    gkde = stats.gaussian_kde(xs)
+    pdf = np.log(gkde.evaluate(xn))
+    pdf2 = gkde.logpdf(xn)
+    assert_almost_equal(pdf, pdf2, decimal=12)
+
+
+def test_pdf_logpdf_weighted():
+    np.random.seed(1)
+    n_basesample = 50
+    xn = np.random.randn(n_basesample)
+    wn = np.random.rand(n_basesample)
+
+    # Default
+    gkde = stats.gaussian_kde(xn, weights=wn)
+
+    xs = np.linspace(-15, 12, 25)
+    pdf = gkde.evaluate(xs)
+    pdf2 = gkde.pdf(xs)
+    assert_almost_equal(pdf, pdf2, decimal=12)
+
+    logpdf = np.log(pdf)
+    logpdf2 = gkde.logpdf(xs)
+    assert_almost_equal(logpdf, logpdf2, decimal=12)
+
+    # There are more points than data
+    gkde = stats.gaussian_kde(xs, weights=np.random.rand(len(xs)))
+    pdf = np.log(gkde.evaluate(xn))
+    pdf2 = gkde.logpdf(xn)
+    assert_almost_equal(pdf, pdf2, decimal=12)
+
+
+def test_weights_intact():
+    # regression test for gh-9709: weights are not modified
+    np.random.seed(12345)
+    vals = np.random.lognormal(size=100)
+    weights = np.random.choice([1.0, 10.0, 100], size=vals.size)
+    orig_weights = weights.copy()
+
+    stats.gaussian_kde(np.log10(vals), weights=weights)
+    assert_allclose(weights, orig_weights, atol=1e-14, rtol=1e-14)
+
+
+def test_weights_integer():
+    # integer weights are OK, cf gh-9709 (comment)
+    np.random.seed(12345)
+    values = [0.2, 13.5, 21.0, 75.0, 99.0]
+    weights = [1, 2, 4, 8, 16]  # a list of integers
+    pdf_i = stats.gaussian_kde(values, weights=weights)
+    pdf_f = stats.gaussian_kde(values, weights=np.float64(weights))
+
+    xn = [0.3, 11, 88]
+    assert_allclose(pdf_i.evaluate(xn),
+                    pdf_f.evaluate(xn), atol=1e-14, rtol=1e-14)
+
+
+def test_seed():
+    # Test the seed option of the resample method
+    def test_seed_sub(gkde_trail):
+        n_sample = 200
+        # The results should be different without using seed
+        samp1 = gkde_trail.resample(n_sample)
+        samp2 = gkde_trail.resample(n_sample)
+        assert_raises(
+            AssertionError, assert_allclose, samp1, samp2, atol=1e-13
+        )
+        # Use integer seed
+        seed = 831
+        samp1 = gkde_trail.resample(n_sample, seed=seed)
+        samp2 = gkde_trail.resample(n_sample, seed=seed)
+        assert_allclose(samp1, samp2, atol=1e-13)
+        # Use RandomState
+        rstate1 = np.random.RandomState(seed=138)
+        samp1 = gkde_trail.resample(n_sample, seed=rstate1)
+        rstate2 = np.random.RandomState(seed=138)
+        samp2 = gkde_trail.resample(n_sample, seed=rstate2)
+        assert_allclose(samp1, samp2, atol=1e-13)
+
+        # check that np.random.Generator can be used (numpy >= 1.17)
+        if hasattr(np.random, 'default_rng'):
+            # obtain a np.random.Generator object
+            rng = np.random.default_rng(1234)
+            gkde_trail.resample(n_sample, seed=rng)
+
+    np.random.seed(8765678)
+    n_basesample = 500
+    wn = np.random.rand(n_basesample)
+    # Test 1D case
+    xn_1d = np.random.randn(n_basesample)
+
+    gkde_1d = stats.gaussian_kde(xn_1d)
+    test_seed_sub(gkde_1d)
+    gkde_1d_weighted = stats.gaussian_kde(xn_1d, weights=wn)
+    test_seed_sub(gkde_1d_weighted)
+
+    # Test 2D case
+    mean = np.array([1.0, 3.0])
+    covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
+    xn_2d = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
+
+    gkde_2d = stats.gaussian_kde(xn_2d)
+    test_seed_sub(gkde_2d)
+    gkde_2d_weighted = stats.gaussian_kde(xn_2d, weights=wn)
+    test_seed_sub(gkde_2d_weighted)