Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
94
venv/Lib/site-packages/sklearn/tree/_splitter.pxd
Normal file
94
venv/Lib/site-packages/sklearn/tree/_splitter.pxd
Normal file
|
@ -0,0 +1,94 @@
|
|||
# Authors: Gilles Louppe <g.louppe@gmail.com>
|
||||
# Peter Prettenhofer <peter.prettenhofer@gmail.com>
|
||||
# Brian Holt <bdholt1@gmail.com>
|
||||
# Joel Nothman <joel.nothman@gmail.com>
|
||||
# Arnaud Joly <arnaud.v.joly@gmail.com>
|
||||
# Jacob Schreiber <jmschreiber91@gmail.com>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
# See _splitter.pyx for details.
|
||||
|
||||
import numpy as np
|
||||
cimport numpy as np
|
||||
|
||||
from ._criterion cimport Criterion
|
||||
|
||||
from ._tree cimport DTYPE_t # Type of X
|
||||
from ._tree cimport DOUBLE_t # Type of y, sample_weight
|
||||
from ._tree cimport SIZE_t # Type for indices and counters
|
||||
from ._tree cimport INT32_t # Signed 32 bit integer
|
||||
from ._tree cimport UINT32_t # Unsigned 32 bit integer
|
||||
|
||||
cdef struct SplitRecord:
|
||||
# Data to track sample split
|
||||
SIZE_t feature # Which feature to split on.
|
||||
SIZE_t pos # Split samples array at the given position,
|
||||
# i.e. count of samples below threshold for feature.
|
||||
# pos is >= end if the node is a leaf.
|
||||
double threshold # Threshold to split at.
|
||||
double improvement # Impurity improvement given parent node.
|
||||
double impurity_left # Impurity of the left split.
|
||||
double impurity_right # Impurity of the right split.
|
||||
|
||||
cdef class Splitter:
|
||||
# The splitter searches in the input space for a feature and a threshold
|
||||
# to split the samples samples[start:end].
|
||||
#
|
||||
# The impurity computations are delegated to a criterion object.
|
||||
|
||||
# Internal structures
|
||||
cdef public Criterion criterion # Impurity criterion
|
||||
cdef public SIZE_t max_features # Number of features to test
|
||||
cdef public SIZE_t min_samples_leaf # Min samples in a leaf
|
||||
cdef public double min_weight_leaf # Minimum weight in a leaf
|
||||
|
||||
cdef object random_state # Random state
|
||||
cdef UINT32_t rand_r_state # sklearn_rand_r random number state
|
||||
|
||||
cdef SIZE_t* samples # Sample indices in X, y
|
||||
cdef SIZE_t n_samples # X.shape[0]
|
||||
cdef double weighted_n_samples # Weighted number of samples
|
||||
cdef SIZE_t* features # Feature indices in X
|
||||
cdef SIZE_t* constant_features # Constant features indices
|
||||
cdef SIZE_t n_features # X.shape[1]
|
||||
cdef DTYPE_t* feature_values # temp. array holding feature values
|
||||
|
||||
cdef SIZE_t start # Start position for the current node
|
||||
cdef SIZE_t end # End position for the current node
|
||||
|
||||
cdef const DOUBLE_t[:, ::1] y
|
||||
cdef DOUBLE_t* sample_weight
|
||||
|
||||
# The samples vector `samples` is maintained by the Splitter object such
|
||||
# that the samples contained in a node are contiguous. With this setting,
|
||||
# `node_split` reorganizes the node samples `samples[start:end]` in two
|
||||
# subsets `samples[start:pos]` and `samples[pos:end]`.
|
||||
|
||||
# The 1-d `features` array of size n_features contains the features
|
||||
# indices and allows fast sampling without replacement of features.
|
||||
|
||||
# The 1-d `constant_features` array of size n_features holds in
|
||||
# `constant_features[:n_constant_features]` the feature ids with
|
||||
# constant values for all the samples that reached a specific node.
|
||||
# The value `n_constant_features` is given by the parent node to its
|
||||
# child nodes. The content of the range `[n_constant_features:]` is left
|
||||
# undefined, but preallocated for performance reasons
|
||||
# This allows optimization with depth-based tree building.
|
||||
|
||||
# Methods
|
||||
cdef int init(self, object X, const DOUBLE_t[:, ::1] y,
|
||||
DOUBLE_t* sample_weight,
|
||||
np.ndarray X_idx_sorted=*) except -1
|
||||
|
||||
cdef int node_reset(self, SIZE_t start, SIZE_t end,
|
||||
double* weighted_n_node_samples) nogil except -1
|
||||
|
||||
cdef int node_split(self,
|
||||
double impurity, # Impurity of the node
|
||||
SplitRecord* split,
|
||||
SIZE_t* n_constant_features) nogil except -1
|
||||
|
||||
cdef void node_value(self, double* dest) nogil
|
||||
|
||||
cdef double node_impurity(self) nogil
|
Loading…
Add table
Add a link
Reference in a new issue