Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/tree/_criterion.pxd
+++ b/venv/Lib/site-packages/sklearn/tree/_criterion.pxd
@ -0,0 +1,77 @@
+# Authors: Gilles Louppe <g.louppe@gmail.com>
+#          Peter Prettenhofer <peter.prettenhofer@gmail.com>
+#          Brian Holt <bdholt1@gmail.com>
+#          Joel Nothman <joel.nothman@gmail.com>
+#          Arnaud Joly <arnaud.v.joly@gmail.com>
+#          Jacob Schreiber <jmschreiber91@gmail.com>
+#
+# License: BSD 3 clause
+
+# See _criterion.pyx for implementation details.
+
+import numpy as np
+cimport numpy as np
+
+from ._tree cimport DTYPE_t          # Type of X
+from ._tree cimport DOUBLE_t         # Type of y, sample_weight
+from ._tree cimport SIZE_t           # Type for indices and counters
+from ._tree cimport INT32_t          # Signed 32 bit integer
+from ._tree cimport UINT32_t         # Unsigned 32 bit integer
+
+cdef class Criterion:
+    # The criterion computes the impurity of a node and the reduction of
+    # impurity of a split on that node. It also computes the output statistics
+    # such as the mean in regression and class probabilities in classification.
+
+    # Internal structures
+    cdef const DOUBLE_t[:, ::1] y        # Values of y
+    cdef DOUBLE_t* sample_weight         # Sample weights
+
+    cdef SIZE_t* samples                 # Sample indices in X, y
+    cdef SIZE_t start                    # samples[start:pos] are the samples in the left node
+    cdef SIZE_t pos                      # samples[pos:end] are the samples in the right node
+    cdef SIZE_t end
+
+    cdef SIZE_t n_outputs                # Number of outputs
+    cdef SIZE_t n_samples                # Number of samples
+    cdef SIZE_t n_node_samples           # Number of samples in the node (end-start)
+    cdef double weighted_n_samples       # Weighted number of samples (in total)
+    cdef double weighted_n_node_samples  # Weighted number of samples in the node
+    cdef double weighted_n_left          # Weighted number of samples in the left node
+    cdef double weighted_n_right         # Weighted number of samples in the right node
+
+    cdef double* sum_total          # For classification criteria, the sum of the
+                                    # weighted count of each label. For regression,
+                                    # the sum of w*y. sum_total[k] is equal to
+                                    # sum_{i=start}^{end-1} w[samples[i]]*y[samples[i], k],
+                                    # where k is output index.
+    cdef double* sum_left           # Same as above, but for the left side of the split
+    cdef double* sum_right          # same as above, but for the right side of the split
+
+    # The criterion object is maintained such that left and right collected
+    # statistics correspond to samples[start:pos] and samples[pos:end].
+
+    # Methods
+    cdef int init(self, const DOUBLE_t[:, ::1] y, DOUBLE_t* sample_weight,
+                  double weighted_n_samples, SIZE_t* samples, SIZE_t start,
+                  SIZE_t end) nogil except -1
+    cdef int reset(self) nogil except -1
+    cdef int reverse_reset(self) nogil except -1
+    cdef int update(self, SIZE_t new_pos) nogil except -1
+    cdef double node_impurity(self) nogil
+    cdef void children_impurity(self, double* impurity_left,
+                                double* impurity_right) nogil
+    cdef void node_value(self, double* dest) nogil
+    cdef double impurity_improvement(self, double impurity) nogil
+    cdef double proxy_impurity_improvement(self) nogil
+
+cdef class ClassificationCriterion(Criterion):
+    """Abstract criterion for classification."""
+
+    cdef SIZE_t* n_classes
+    cdef SIZE_t sum_stride
+
+cdef class RegressionCriterion(Criterion):
+    """Abstract regression criterion."""
+
+    cdef double sq_sum_total