Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/utils/random.py
+++ b/venv/Lib/site-packages/sklearn/utils/random.py
@ -0,0 +1,104 @@
+# Author: Hamzeh Alsalhi <ha258@cornell.edu>
+#
+# License: BSD 3 clause
+import numpy as np
+import scipy.sparse as sp
+import array
+
+from . import check_random_state
+from ._random import sample_without_replacement
+from . import deprecated
+
+__all__ = ['sample_without_replacement']
+
+
+@deprecated("random_choice_csc is deprecated in version "
+            "0.22 and will be removed in version 0.24.")
+def random_choice_csc(n_samples, classes, class_probability=None,
+                      random_state=None):
+    return _random_choice_csc(n_samples, classes, class_probability,
+                              random_state)
+
+
+def _random_choice_csc(n_samples, classes, class_probability=None,
+                       random_state=None):
+    """Generate a sparse random matrix given column class distributions
+
+    Parameters
+    ----------
+    n_samples : int,
+        Number of samples to draw in each column.
+
+    classes : list of size n_outputs of arrays of size (n_classes,)
+        List of classes for each column.
+
+    class_probability : list of size n_outputs of arrays of size (n_classes,)
+        Optional (default=None). Class distribution of each column. If None the
+        uniform distribution is assumed.
+
+    random_state : int, RandomState instance, default=None
+        Controls the randomness of the sampled classes.
+        See :term:`Glossary <random_state>`.
+
+    Returns
+    -------
+    random_matrix : sparse csc matrix of size (n_samples, n_outputs)
+
+    """
+    data = array.array('i')
+    indices = array.array('i')
+    indptr = array.array('i', [0])
+
+    for j in range(len(classes)):
+        classes[j] = np.asarray(classes[j])
+        if classes[j].dtype.kind != 'i':
+            raise ValueError("class dtype %s is not supported" %
+                             classes[j].dtype)
+        classes[j] = classes[j].astype(np.int64, copy=False)
+
+        # use uniform distribution if no class_probability is given
+        if class_probability is None:
+            class_prob_j = np.empty(shape=classes[j].shape[0])
+            class_prob_j.fill(1 / classes[j].shape[0])
+        else:
+            class_prob_j = np.asarray(class_probability[j])
+
+        if not np.isclose(np.sum(class_prob_j), 1.0):
+            raise ValueError("Probability array at index {0} does not sum to "
+                             "one".format(j))
+
+        if class_prob_j.shape[0] != classes[j].shape[0]:
+            raise ValueError("classes[{0}] (length {1}) and "
+                             "class_probability[{0}] (length {2}) have "
+                             "different length.".format(j,
+                                                        classes[j].shape[0],
+                                                        class_prob_j.shape[0]))
+
+        # If 0 is not present in the classes insert it with a probability 0.0
+        if 0 not in classes[j]:
+            classes[j] = np.insert(classes[j], 0, 0)
+            class_prob_j = np.insert(class_prob_j, 0, 0.0)
+
+        # If there are nonzero classes choose randomly using class_probability
+        rng = check_random_state(random_state)
+        if classes[j].shape[0] > 1:
+            p_nonzero = 1 - class_prob_j[classes[j] == 0]
+            nnz = int(n_samples * p_nonzero)
+            ind_sample = sample_without_replacement(n_population=n_samples,
+                                                    n_samples=nnz,
+                                                    random_state=random_state)
+            indices.extend(ind_sample)
+
+            # Normalize probabilities for the nonzero elements
+            classes_j_nonzero = classes[j] != 0
+            class_probability_nz = class_prob_j[classes_j_nonzero]
+            class_probability_nz_norm = (class_probability_nz /
+                                         np.sum(class_probability_nz))
+            classes_ind = np.searchsorted(class_probability_nz_norm.cumsum(),
+                                          rng.rand(nnz))
+            data.extend(classes[j][classes_j_nonzero][classes_ind])
+        indptr.append(len(indices))
+
+    return sp.csc_matrix((data, indices, indptr),
+                         (n_samples, len(classes)),
+                         dtype=int)