Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_random.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_random.py
@ -0,0 +1,187 @@
+import numpy as np
+import pytest
+import scipy.sparse as sp
+from scipy.special import comb
+from numpy.testing import assert_array_almost_equal
+
+from sklearn.utils.random import _random_choice_csc, sample_without_replacement
+from sklearn.utils._random import _our_rand_r_py
+
+
+###############################################################################
+# test custom sampling without replacement algorithm
+###############################################################################
+def test_invalid_sample_without_replacement_algorithm():
+    with pytest.raises(ValueError):
+        sample_without_replacement(5, 4, "unknown")
+
+
+def test_sample_without_replacement_algorithms():
+    methods = ("auto", "tracking_selection", "reservoir_sampling", "pool")
+
+    for m in methods:
+        def sample_without_replacement_method(n_population, n_samples,
+                                              random_state=None):
+            return sample_without_replacement(n_population, n_samples,
+                                              method=m,
+                                              random_state=random_state)
+
+        check_edge_case_of_sample_int(sample_without_replacement_method)
+        check_sample_int(sample_without_replacement_method)
+        check_sample_int_distribution(sample_without_replacement_method)
+
+
+def check_edge_case_of_sample_int(sample_without_replacement):
+
+    # n_population < n_sample
+    with pytest.raises(ValueError):
+        sample_without_replacement(0, 1)
+    with pytest.raises(ValueError):
+        sample_without_replacement(1, 2)
+
+    # n_population == n_samples
+    assert sample_without_replacement(0, 0).shape == (0, )
+
+    assert sample_without_replacement(1, 1).shape == (1, )
+
+    # n_population >= n_samples
+    assert sample_without_replacement(5, 0).shape == (0, )
+    assert sample_without_replacement(5, 1).shape == (1, )
+
+    # n_population < 0 or n_samples < 0
+    with pytest.raises(ValueError):
+        sample_without_replacement(-1, 5)
+    with pytest.raises(ValueError):
+        sample_without_replacement(5, -1)
+
+
+def check_sample_int(sample_without_replacement):
+    # This test is heavily inspired from test_random.py of python-core.
+    #
+    # For the entire allowable range of 0 <= k <= N, validate that
+    # the sample is of the correct length and contains only unique items
+    n_population = 100
+
+    for n_samples in range(n_population + 1):
+        s = sample_without_replacement(n_population, n_samples)
+        assert len(s) == n_samples
+        unique = np.unique(s)
+        assert np.size(unique) == n_samples
+        assert np.all(unique < n_population)
+
+    # test edge case n_population == n_samples == 0
+    assert np.size(sample_without_replacement(0, 0)) == 0
+
+
+def check_sample_int_distribution(sample_without_replacement):
+    # This test is heavily inspired from test_random.py of python-core.
+    #
+    # For the entire allowable range of 0 <= k <= N, validate that
+    # sample generates all possible permutations
+    n_population = 10
+
+    # a large number of trials prevents false negatives without slowing normal
+    # case
+    n_trials = 10000
+
+    for n_samples in range(n_population):
+        # Counting the number of combinations is not as good as counting the
+        # the number of permutations. However, it works with sampling algorithm
+        # that does not provide a random permutation of the subset of integer.
+        n_expected = comb(n_population, n_samples, exact=True)
+
+        output = {}
+        for i in range(n_trials):
+            output[frozenset(sample_without_replacement(n_population,
+                                                        n_samples))] = None
+
+            if len(output) == n_expected:
+                break
+        else:
+            raise AssertionError(
+                "number of combinations != number of expected (%s != %s)" %
+                (len(output), n_expected))
+
+
+def test_random_choice_csc(n_samples=10000, random_state=24):
+    # Explicit class probabilities
+    classes = [np.array([0, 1]),  np.array([0, 1, 2])]
+    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]
+
+    got = _random_choice_csc(n_samples, classes, class_probabilities,
+                            random_state)
+    assert sp.issparse(got)
+
+    for k in range(len(classes)):
+        p = np.bincount(got.getcol(k).toarray().ravel()) / float(n_samples)
+        assert_array_almost_equal(class_probabilities[k], p, decimal=1)
+
+    # Implicit class probabilities
+    classes = [[0, 1],  [1, 2]]  # test for array-like support
+    class_probabilities = [np.array([0.5, 0.5]), np.array([0, 1/2, 1/2])]
+
+    got = _random_choice_csc(n_samples=n_samples,
+                            classes=classes,
+                            random_state=random_state)
+    assert sp.issparse(got)
+
+    for k in range(len(classes)):
+        p = np.bincount(got.getcol(k).toarray().ravel()) / float(n_samples)
+        assert_array_almost_equal(class_probabilities[k], p, decimal=1)
+
+    # Edge case probabilities 1.0 and 0.0
+    classes = [np.array([0, 1]),  np.array([0, 1, 2])]
+    class_probabilities = [np.array([1.0, 0.0]), np.array([0.0, 1.0, 0.0])]
+
+    got = _random_choice_csc(n_samples, classes, class_probabilities,
+                            random_state)
+    assert sp.issparse(got)
+
+    for k in range(len(classes)):
+        p = np.bincount(got.getcol(k).toarray().ravel(),
+                        minlength=len(class_probabilities[k])) / n_samples
+        assert_array_almost_equal(class_probabilities[k], p, decimal=1)
+
+    # One class target data
+    classes = [[1],  [0]]  # test for array-like support
+    class_probabilities = [np.array([0.0, 1.0]), np.array([1.0])]
+
+    got = _random_choice_csc(n_samples=n_samples,
+                            classes=classes,
+                            random_state=random_state)
+    assert sp.issparse(got)
+
+    for k in range(len(classes)):
+        p = np.bincount(got.getcol(k).toarray().ravel()) / n_samples
+        assert_array_almost_equal(class_probabilities[k], p, decimal=1)
+
+
+def test_random_choice_csc_errors():
+    # the length of an array in classes and class_probabilities is mismatched
+    classes = [np.array([0, 1]),  np.array([0, 1, 2, 3])]
+    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]
+    with pytest.raises(ValueError):
+        _random_choice_csc(4, classes, class_probabilities, 1)
+
+    # the class dtype is not supported
+    classes = [np.array(["a", "1"]),  np.array(["z", "1", "2"])]
+    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]
+    with pytest.raises(ValueError):
+        _random_choice_csc(4, classes, class_probabilities, 1)
+
+    # the class dtype is not supported
+    classes = [np.array([4.2, 0.1]),  np.array([0.1, 0.2, 9.4])]
+    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]
+    with pytest.raises(ValueError):
+        _random_choice_csc(4, classes, class_probabilities, 1)
+
+    # Given probabilities don't sum to 1
+    classes = [np.array([0, 1]),  np.array([0, 1, 2])]
+    class_probabilities = [np.array([0.5, 0.6]), np.array([0.6, 0.1, 0.3])]
+    with pytest.raises(ValueError):
+        _random_choice_csc(4, classes, class_probabilities, 1)
+
+
+def test_our_rand_r():
+    assert 131541053 == _our_rand_r_py(1273642419)
+    assert 270369 == _our_rand_r_py(0)