Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/feature_extraction/init.py
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/init.py
@ -0,0 +1,13 @@
+"""
+The :mod:`sklearn.feature_extraction` module deals with feature extraction
+from raw data. It currently includes methods to extract features from text and
+images.
+"""
+
+from ._dict_vectorizer import DictVectorizer
+from ._hash import FeatureHasher
+from .image import img_to_graph, grid_to_graph
+from . import text
+
+__all__ = ['DictVectorizer', 'image', 'img_to_graph', 'grid_to_graph', 'text',
+           'FeatureHasher']
--- a/venv/Lib/site-packages/sklearn/feature_extraction/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_extraction/pycache/_dict_vectorizer.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/pycache/_dict_vectorizer.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_extraction/pycache/_hash.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/pycache/_hash.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_extraction/pycache/_stop_words.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/pycache/_stop_words.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_extraction/pycache/dict_vectorizer.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/pycache/dict_vectorizer.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_extraction/pycache/hashing.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/pycache/hashing.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_extraction/pycache/image.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/pycache/image.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_extraction/pycache/setup.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/pycache/setup.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_extraction/pycache/stop_words.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/pycache/stop_words.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_extraction/pycache/text.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/pycache/text.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_extraction/_dict_vectorizer.py
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/_dict_vectorizer.py
@ -0,0 +1,364 @@
+# Authors: Lars Buitinck
+#          Dan Blanchard <dblanchard@ets.org>
+# License: BSD 3 clause
+
+from array import array
+from collections.abc import Mapping
+from operator import itemgetter
+
+import numpy as np
+import scipy.sparse as sp
+
+from ..base import BaseEstimator, TransformerMixin
+from ..utils import check_array, tosequence
+from ..utils.validation import _deprecate_positional_args
+
+
+def _tosequence(X):
+    """Turn X into a sequence or ndarray, avoiding a copy if possible."""
+    if isinstance(X, Mapping):  # single sample
+        return [X]
+    else:
+        return tosequence(X)
+
+
+class DictVectorizer(TransformerMixin, BaseEstimator):
+    """Transforms lists of feature-value mappings to vectors.
+
+    This transformer turns lists of mappings (dict-like objects) of feature
+    names to feature values into Numpy arrays or scipy.sparse matrices for use
+    with scikit-learn estimators.
+
+    When feature values are strings, this transformer will do a binary one-hot
+    (aka one-of-K) coding: one boolean-valued feature is constructed for each
+    of the possible string values that the feature can take on. For instance,
+    a feature "f" that can take on the values "ham" and "spam" will become two
+    features in the output, one signifying "f=ham", the other "f=spam".
+
+    However, note that this transformer will only do a binary one-hot encoding
+    when feature values are of type string. If categorical features are
+    represented as numeric values such as int, the DictVectorizer can be
+    followed by :class:`sklearn.preprocessing.OneHotEncoder` to complete
+    binary one-hot encoding.
+
+    Features that do not occur in a sample (mapping) will have a zero value
+    in the resulting array/matrix.
+
+    Read more in the :ref:`User Guide <dict_feature_extraction>`.
+
+    Parameters
+    ----------
+    dtype : dtype, default=np.float64
+        The type of feature values. Passed to Numpy array/scipy.sparse matrix
+        constructors as the dtype argument.
+    separator : str, default="="
+        Separator string used when constructing new features for one-hot
+        coding.
+    sparse : bool, default=True
+        Whether transform should produce scipy.sparse matrices.
+    sort : bool, default=True
+        Whether ``feature_names_`` and ``vocabulary_`` should be
+        sorted when fitting.
+
+    Attributes
+    ----------
+    vocabulary_ : dict
+        A dictionary mapping feature names to feature indices.
+
+    feature_names_ : list
+        A list of length n_features containing the feature names (e.g., "f=ham"
+        and "f=spam").
+
+    Examples
+    --------
+    >>> from sklearn.feature_extraction import DictVectorizer
+    >>> v = DictVectorizer(sparse=False)
+    >>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
+    >>> X = v.fit_transform(D)
+    >>> X
+    array([[2., 0., 1.],
+           [0., 1., 3.]])
+    >>> v.inverse_transform(X) == \
+        [{'bar': 2.0, 'foo': 1.0}, {'baz': 1.0, 'foo': 3.0}]
+    True
+    >>> v.transform({'foo': 4, 'unseen_feature': 3})
+    array([[0., 0., 4.]])
+
+    See also
+    --------
+    FeatureHasher : performs vectorization using only a hash function.
+    sklearn.preprocessing.OrdinalEncoder : handles nominal/categorical
+      features encoded as columns of arbitrary data types.
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, dtype=np.float64, separator="=", sparse=True,
+                 sort=True):
+        self.dtype = dtype
+        self.separator = separator
+        self.sparse = sparse
+        self.sort = sort
+
+    def fit(self, X, y=None):
+        """Learn a list of feature name -> indices mappings.
+
+        Parameters
+        ----------
+        X : Mapping or iterable over Mappings
+            Dict(s) or Mapping(s) from feature names (arbitrary Python
+            objects) to feature values (strings or convertible to dtype).
+        y : (ignored)
+
+        Returns
+        -------
+        self
+        """
+        feature_names = []
+        vocab = {}
+
+        for x in X:
+            for f, v in x.items():
+                if isinstance(v, str):
+                    f = "%s%s%s" % (f, self.separator, v)
+                if f not in vocab:
+                    feature_names.append(f)
+                    vocab[f] = len(vocab)
+
+        if self.sort:
+            feature_names.sort()
+            vocab = {f: i for i, f in enumerate(feature_names)}
+
+        self.feature_names_ = feature_names
+        self.vocabulary_ = vocab
+
+        return self
+
+    def _transform(self, X, fitting):
+        # Sanity check: Python's array has no way of explicitly requesting the
+        # signed 32-bit integers that scipy.sparse needs, so we use the next
+        # best thing: typecode "i" (int). However, if that gives larger or
+        # smaller integers than 32-bit ones, np.frombuffer screws up.
+        assert array("i").itemsize == 4, (
+            "sizeof(int) != 4 on your platform; please report this at"
+            " https://github.com/scikit-learn/scikit-learn/issues and"
+            " include the output from platform.platform() in your bug report")
+
+        dtype = self.dtype
+        if fitting:
+            feature_names = []
+            vocab = {}
+        else:
+            feature_names = self.feature_names_
+            vocab = self.vocabulary_
+
+        # Process everything as sparse regardless of setting
+        X = [X] if isinstance(X, Mapping) else X
+
+        indices = array("i")
+        indptr = [0]
+        # XXX we could change values to an array.array as well, but it
+        # would require (heuristic) conversion of dtype to typecode...
+        values = []
+
+        # collect all the possible feature names and build sparse matrix at
+        # same time
+        for x in X:
+            for f, v in x.items():
+                if isinstance(v, str):
+                    f = "%s%s%s" % (f, self.separator, v)
+                    v = 1
+                if f in vocab:
+                    indices.append(vocab[f])
+                    values.append(dtype(v))
+                else:
+                    if fitting:
+                        feature_names.append(f)
+                        vocab[f] = len(vocab)
+                        indices.append(vocab[f])
+                        values.append(dtype(v))
+
+            indptr.append(len(indices))
+
+        if len(indptr) == 1:
+            raise ValueError("Sample sequence X is empty.")
+
+        indices = np.frombuffer(indices, dtype=np.intc)
+        shape = (len(indptr) - 1, len(vocab))
+
+        result_matrix = sp.csr_matrix((values, indices, indptr),
+                                      shape=shape, dtype=dtype)
+
+        # Sort everything if asked
+        if fitting and self.sort:
+            feature_names.sort()
+            map_index = np.empty(len(feature_names), dtype=np.int32)
+            for new_val, f in enumerate(feature_names):
+                map_index[new_val] = vocab[f]
+                vocab[f] = new_val
+            result_matrix = result_matrix[:, map_index]
+
+        if self.sparse:
+            result_matrix.sort_indices()
+        else:
+            result_matrix = result_matrix.toarray()
+
+        if fitting:
+            self.feature_names_ = feature_names
+            self.vocabulary_ = vocab
+
+        return result_matrix
+
+    def fit_transform(self, X, y=None):
+        """Learn a list of feature name -> indices mappings and transform X.
+
+        Like fit(X) followed by transform(X), but does not require
+        materializing X in memory.
+
+        Parameters
+        ----------
+        X : Mapping or iterable over Mappings
+            Dict(s) or Mapping(s) from feature names (arbitrary Python
+            objects) to feature values (strings or convertible to dtype).
+        y : (ignored)
+
+        Returns
+        -------
+        Xa : {array, sparse matrix}
+            Feature vectors; always 2-d.
+        """
+        return self._transform(X, fitting=True)
+
+    def inverse_transform(self, X, dict_type=dict):
+        """Transform array or sparse matrix X back to feature mappings.
+
+        X must have been produced by this DictVectorizer's transform or
+        fit_transform method; it may only have passed through transformers
+        that preserve the number of features and their order.
+
+        In the case of one-hot/one-of-K coding, the constructed feature
+        names and values are returned rather than the original ones.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Sample matrix.
+        dict_type : type, default=dict
+            Constructor for feature mappings. Must conform to the
+            collections.Mapping API.
+
+        Returns
+        -------
+        D : list of dict_type objects of shape (n_samples,)
+            Feature mappings for the samples in X.
+        """
+        # COO matrix is not subscriptable
+        X = check_array(X, accept_sparse=['csr', 'csc'])
+        n_samples = X.shape[0]
+
+        names = self.feature_names_
+        dicts = [dict_type() for _ in range(n_samples)]
+
+        if sp.issparse(X):
+            for i, j in zip(*X.nonzero()):
+                dicts[i][names[j]] = X[i, j]
+        else:
+            for i, d in enumerate(dicts):
+                for j, v in enumerate(X[i, :]):
+                    if v != 0:
+                        d[names[j]] = X[i, j]
+
+        return dicts
+
+    def transform(self, X):
+        """Transform feature->value dicts to array or sparse matrix.
+
+        Named features not encountered during fit or fit_transform will be
+        silently ignored.
+
+        Parameters
+        ----------
+        X : Mapping or iterable over Mappings of shape (n_samples,)
+            Dict(s) or Mapping(s) from feature names (arbitrary Python
+            objects) to feature values (strings or convertible to dtype).
+
+        Returns
+        -------
+        Xa : {array, sparse matrix}
+            Feature vectors; always 2-d.
+        """
+        if self.sparse:
+            return self._transform(X, fitting=False)
+
+        else:
+            dtype = self.dtype
+            vocab = self.vocabulary_
+            X = _tosequence(X)
+            Xa = np.zeros((len(X), len(vocab)), dtype=dtype)
+
+            for i, x in enumerate(X):
+                for f, v in x.items():
+                    if isinstance(v, str):
+                        f = "%s%s%s" % (f, self.separator, v)
+                        v = 1
+                    try:
+                        Xa[i, vocab[f]] = dtype(v)
+                    except KeyError:
+                        pass
+
+            return Xa
+
+    def get_feature_names(self):
+        """Returns a list of feature names, ordered by their indices.
+
+        If one-of-K coding is applied to categorical features, this will
+        include the constructed feature names but not the original ones.
+        """
+        return self.feature_names_
+
+    def restrict(self, support, indices=False):
+        """Restrict the features to those in support using feature selection.
+
+        This function modifies the estimator in-place.
+
+        Parameters
+        ----------
+        support : array-like
+            Boolean mask or list of indices (as returned by the get_support
+            member of feature selectors).
+        indices : bool, default=False
+            Whether support is a list of indices.
+
+        Returns
+        -------
+        self
+
+        Examples
+        --------
+        >>> from sklearn.feature_extraction import DictVectorizer
+        >>> from sklearn.feature_selection import SelectKBest, chi2
+        >>> v = DictVectorizer()
+        >>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
+        >>> X = v.fit_transform(D)
+        >>> support = SelectKBest(chi2, k=2).fit(X, [0, 1])
+        >>> v.get_feature_names()
+        ['bar', 'baz', 'foo']
+        >>> v.restrict(support.get_support())
+        DictVectorizer()
+        >>> v.get_feature_names()
+        ['bar', 'foo']
+        """
+        if not indices:
+            support = np.where(support)[0]
+
+        names = self.feature_names_
+        new_vocab = {}
+        for i in support:
+            new_vocab[names[i]] = len(new_vocab)
+
+        self.vocabulary_ = new_vocab
+        self.feature_names_ = [f for f, i in sorted(new_vocab.items(),
+                                                    key=itemgetter(1))]
+
+        return self
+
+    def _more_tags(self):
+        return {'X_types': ["dict"]}
--- a/venv/Lib/site-packages/sklearn/feature_extraction/_hash.py
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/_hash.py
@ -0,0 +1,173 @@
+# Author: Lars Buitinck
+# License: BSD 3 clause
+
+import numbers
+
+import numpy as np
+import scipy.sparse as sp
+
+from ..utils import IS_PYPY
+from ..utils.validation import _deprecate_positional_args
+from ..base import BaseEstimator, TransformerMixin
+
+if not IS_PYPY:
+    from ._hashing_fast import transform as _hashing_transform
+else:
+    def _hashing_transform(*args, **kwargs):
+        raise NotImplementedError(
+                'FeatureHasher is not compatible with PyPy (see '
+                'https://github.com/scikit-learn/scikit-learn/issues/11540 '
+                'for the status updates).')
+
+
+def _iteritems(d):
+    """Like d.iteritems, but accepts any collections.Mapping."""
+    return d.iteritems() if hasattr(d, "iteritems") else d.items()
+
+
+class FeatureHasher(TransformerMixin, BaseEstimator):
+    """Implements feature hashing, aka the hashing trick.
+
+    This class turns sequences of symbolic feature names (strings) into
+    scipy.sparse matrices, using a hash function to compute the matrix column
+    corresponding to a name. The hash function employed is the signed 32-bit
+    version of Murmurhash3.
+
+    Feature names of type byte string are used as-is. Unicode strings are
+    converted to UTF-8 first, but no Unicode normalization is done.
+    Feature values must be (finite) numbers.
+
+    This class is a low-memory alternative to DictVectorizer and
+    CountVectorizer, intended for large-scale (online) learning and situations
+    where memory is tight, e.g. when running prediction code on embedded
+    devices.
+
+    Read more in the :ref:`User Guide <feature_hashing>`.
+
+    .. versionadded:: 0.13
+
+    Parameters
+    ----------
+    n_features : int, default=2**20
+        The number of features (columns) in the output matrices. Small numbers
+        of features are likely to cause hash collisions, but large numbers
+        will cause larger coefficient dimensions in linear learners.
+    input_type : {"dict", "pair"}, default="dict"
+        Either "dict" (the default) to accept dictionaries over
+        (feature_name, value); "pair" to accept pairs of (feature_name, value);
+        or "string" to accept single strings.
+        feature_name should be a string, while value should be a number.
+        In the case of "string", a value of 1 is implied.
+        The feature_name is hashed to find the appropriate column for the
+        feature. The value's sign might be flipped in the output (but see
+        non_negative, below).
+    dtype : numpy dtype, default=np.float64
+        The type of feature values. Passed to scipy.sparse matrix constructors
+        as the dtype argument. Do not set this to bool, np.boolean or any
+        unsigned integer type.
+    alternate_sign : bool, default=True
+        When True, an alternating sign is added to the features as to
+        approximately conserve the inner product in the hashed space even for
+        small n_features. This approach is similar to sparse random projection.
+
+    .. versionchanged:: 0.19
+        ``alternate_sign`` replaces the now deprecated ``non_negative``
+        parameter.
+
+    Examples
+    --------
+    >>> from sklearn.feature_extraction import FeatureHasher
+    >>> h = FeatureHasher(n_features=10)
+    >>> D = [{'dog': 1, 'cat':2, 'elephant':4},{'dog': 2, 'run': 5}]
+    >>> f = h.transform(D)
+    >>> f.toarray()
+    array([[ 0.,  0., -4., -1.,  0.,  0.,  0.,  0.,  0.,  2.],
+           [ 0.,  0.,  0., -2., -5.,  0.,  0.,  0.,  0.,  0.]])
+
+    See also
+    --------
+    DictVectorizer : vectorizes string-valued features using a hash table.
+    sklearn.preprocessing.OneHotEncoder : handles nominal/categorical features.
+    """
+    @_deprecate_positional_args
+    def __init__(self, n_features=(2 ** 20), *, input_type="dict",
+                 dtype=np.float64, alternate_sign=True):
+        self._validate_params(n_features, input_type)
+
+        self.dtype = dtype
+        self.input_type = input_type
+        self.n_features = n_features
+        self.alternate_sign = alternate_sign
+
+    @staticmethod
+    def _validate_params(n_features, input_type):
+        # strangely, np.int16 instances are not instances of Integral,
+        # while np.int64 instances are...
+        if not isinstance(n_features, numbers.Integral):
+            raise TypeError("n_features must be integral, got %r (%s)."
+                            % (n_features, type(n_features)))
+        elif n_features < 1 or n_features >= np.iinfo(np.int32).max + 1:
+            raise ValueError("Invalid number of features (%d)." % n_features)
+
+        if input_type not in ("dict", "pair", "string"):
+            raise ValueError("input_type must be 'dict', 'pair' or 'string',"
+                             " got %r." % input_type)
+
+    def fit(self, X=None, y=None):
+        """No-op.
+
+        This method doesn't do anything. It exists purely for compatibility
+        with the scikit-learn transformer API.
+
+        Parameters
+        ----------
+        X : ndarray
+
+        Returns
+        -------
+        self : FeatureHasher
+
+        """
+        # repeat input validation for grid search (which calls set_params)
+        self._validate_params(self.n_features, self.input_type)
+        return self
+
+    def transform(self, raw_X):
+        """Transform a sequence of instances to a scipy.sparse matrix.
+
+        Parameters
+        ----------
+        raw_X : iterable over iterable over raw features, length = n_samples
+            Samples. Each sample must be iterable an (e.g., a list or tuple)
+            containing/generating feature names (and optionally values, see
+            the input_type constructor argument) which will be hashed.
+            raw_X need not support the len function, so it can be the result
+            of a generator; n_samples is determined on the fly.
+
+        Returns
+        -------
+        X : sparse matrix of shape (n_samples, n_features)
+            Feature matrix, for use with estimators or further transformers.
+
+        """
+        raw_X = iter(raw_X)
+        if self.input_type == "dict":
+            raw_X = (_iteritems(d) for d in raw_X)
+        elif self.input_type == "string":
+            raw_X = (((f, 1) for f in x) for x in raw_X)
+        indices, indptr, values = \
+            _hashing_transform(raw_X, self.n_features, self.dtype,
+                               self.alternate_sign, seed=0)
+        n_samples = indptr.shape[0] - 1
+
+        if n_samples == 0:
+            raise ValueError("Cannot vectorize empty sequence.")
+
+        X = sp.csr_matrix((values, indices, indptr), dtype=self.dtype,
+                          shape=(n_samples, self.n_features))
+        X.sum_duplicates()  # also sorts the indices
+
+        return X
+
+    def _more_tags(self):
+        return {'X_types': [self.input_type]}
--- a/venv/Lib/site-packages/sklearn/feature_extraction/_hashing_fast.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/_hashing_fast.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/feature_extraction/_stop_words.py
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/_stop_words.py
@ -0,0 +1,45 @@
+# This list of English stop words is taken from the "Glasgow Information
+# Retrieval Group". The original list can be found at
+# http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words
+ENGLISH_STOP_WORDS = frozenset([
+    "a", "about", "above", "across", "after", "afterwards", "again", "against",
+    "all", "almost", "alone", "along", "already", "also", "although", "always",
+    "am", "among", "amongst", "amoungst", "amount", "an", "and", "another",
+    "any", "anyhow", "anyone", "anything", "anyway", "anywhere", "are",
+    "around", "as", "at", "back", "be", "became", "because", "become",
+    "becomes", "becoming", "been", "before", "beforehand", "behind", "being",
+    "below", "beside", "besides", "between", "beyond", "bill", "both",
+    "bottom", "but", "by", "call", "can", "cannot", "cant", "co", "con",
+    "could", "couldnt", "cry", "de", "describe", "detail", "do", "done",
+    "down", "due", "during", "each", "eg", "eight", "either", "eleven", "else",
+    "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone",
+    "everything", "everywhere", "except", "few", "fifteen", "fifty", "fill",
+    "find", "fire", "first", "five", "for", "former", "formerly", "forty",
+    "found", "four", "from", "front", "full", "further", "get", "give", "go",
+    "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter",
+    "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his",
+    "how", "however", "hundred", "i", "ie", "if", "in", "inc", "indeed",
+    "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter",
+    "latterly", "least", "less", "ltd", "made", "many", "may", "me",
+    "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly",
+    "move", "much", "must", "my", "myself", "name", "namely", "neither",
+    "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone",
+    "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on",
+    "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our",
+    "ours", "ourselves", "out", "over", "own", "part", "per", "perhaps",
+    "please", "put", "rather", "re", "same", "see", "seem", "seemed",
+    "seeming", "seems", "serious", "several", "she", "should", "show", "side",
+    "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone",
+    "something", "sometime", "sometimes", "somewhere", "still", "such",
+    "system", "take", "ten", "than", "that", "the", "their", "them",
+    "themselves", "then", "thence", "there", "thereafter", "thereby",
+    "therefore", "therein", "thereupon", "these", "they", "thick", "thin",
+    "third", "this", "those", "though", "three", "through", "throughout",
+    "thru", "thus", "to", "together", "too", "top", "toward", "towards",
+    "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us",
+    "very", "via", "was", "we", "well", "were", "what", "whatever", "when",
+    "whence", "whenever", "where", "whereafter", "whereas", "whereby",
+    "wherein", "whereupon", "wherever", "whether", "which", "while", "whither",
+    "who", "whoever", "whole", "whom", "whose", "why", "will", "with",
+    "within", "without", "would", "yet", "you", "your", "yours", "yourself",
+    "yourselves"])
--- a/venv/Lib/site-packages/sklearn/feature_extraction/dict_vectorizer.py
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/dict_vectorizer.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _dict_vectorizer  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.feature_extraction.dict_vectorizer'
+correct_import_path = 'sklearn.feature_extraction'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_dict_vectorizer, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/feature_extraction/hashing.py
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/hashing.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _hash  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.feature_extraction.hashing'
+correct_import_path = 'sklearn.feature_extraction'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_hash, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/feature_extraction/image.py
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/image.py
@ -0,0 +1,591 @@
+"""
+The :mod:`sklearn.feature_extraction.image` submodule gathers utilities to
+extract features from images.
+"""
+
+# Authors: Emmanuelle Gouillart <emmanuelle.gouillart@normalesup.org>
+#          Gael Varoquaux <gael.varoquaux@normalesup.org>
+#          Olivier Grisel
+#          Vlad Niculae
+# License: BSD 3 clause
+
+from itertools import product
+import numbers
+import numpy as np
+from scipy import sparse
+from numpy.lib.stride_tricks import as_strided
+
+from ..utils import check_array, check_random_state, deprecated
+from ..utils.validation import _deprecate_positional_args
+from ..base import BaseEstimator
+
+__all__ = ['PatchExtractor',
+           'extract_patches_2d',
+           'grid_to_graph',
+           'img_to_graph',
+           'reconstruct_from_patches_2d']
+
+###############################################################################
+# From an image to a graph
+
+
+def _make_edges_3d(n_x, n_y, n_z=1):
+    """Returns a list of edges for a 3D image.
+
+    Parameters
+    ----------
+    n_x : int
+        The size of the grid in the x direction.
+    n_y : int
+        The size of the grid in the y direction.
+    n_z : integer, default=1
+        The size of the grid in the z direction, defaults to 1
+    """
+    vertices = np.arange(n_x * n_y * n_z).reshape((n_x, n_y, n_z))
+    edges_deep = np.vstack((vertices[:, :, :-1].ravel(),
+                            vertices[:, :, 1:].ravel()))
+    edges_right = np.vstack((vertices[:, :-1].ravel(),
+                             vertices[:, 1:].ravel()))
+    edges_down = np.vstack((vertices[:-1].ravel(), vertices[1:].ravel()))
+    edges = np.hstack((edges_deep, edges_right, edges_down))
+    return edges
+
+
+def _compute_gradient_3d(edges, img):
+    _, n_y, n_z = img.shape
+    gradient = np.abs(img[edges[0] // (n_y * n_z),
+                      (edges[0] % (n_y * n_z)) // n_z,
+                      (edges[0] % (n_y * n_z)) % n_z] -
+                      img[edges[1] // (n_y * n_z),
+                      (edges[1] % (n_y * n_z)) // n_z,
+                      (edges[1] % (n_y * n_z)) % n_z])
+    return gradient
+
+
+# XXX: Why mask the image after computing the weights?
+
+def _mask_edges_weights(mask, edges, weights=None):
+    """Apply a mask to edges (weighted or not)"""
+    inds = np.arange(mask.size)
+    inds = inds[mask.ravel()]
+    ind_mask = np.logical_and(np.in1d(edges[0], inds),
+                              np.in1d(edges[1], inds))
+    edges = edges[:, ind_mask]
+    if weights is not None:
+        weights = weights[ind_mask]
+    if len(edges.ravel()):
+        maxval = edges.max()
+    else:
+        maxval = 0
+    order = np.searchsorted(np.unique(edges.ravel()), np.arange(maxval + 1))
+    edges = order[edges]
+    if weights is None:
+        return edges
+    else:
+        return edges, weights
+
+
+def _to_graph(n_x, n_y, n_z, mask=None, img=None,
+              return_as=sparse.coo_matrix, dtype=None):
+    """Auxiliary function for img_to_graph and grid_to_graph
+    """
+    edges = _make_edges_3d(n_x, n_y, n_z)
+
+    if dtype is None:
+        if img is None:
+            dtype = np.int
+        else:
+            dtype = img.dtype
+
+    if img is not None:
+        img = np.atleast_3d(img)
+        weights = _compute_gradient_3d(edges, img)
+        if mask is not None:
+            edges, weights = _mask_edges_weights(mask, edges, weights)
+            diag = img.squeeze()[mask]
+        else:
+            diag = img.ravel()
+        n_voxels = diag.size
+    else:
+        if mask is not None:
+            mask = mask.astype(dtype=np.bool, copy=False)
+            mask = np.asarray(mask, dtype=np.bool)
+            edges = _mask_edges_weights(mask, edges)
+            n_voxels = np.sum(mask)
+        else:
+            n_voxels = n_x * n_y * n_z
+        weights = np.ones(edges.shape[1], dtype=dtype)
+        diag = np.ones(n_voxels, dtype=dtype)
+
+    diag_idx = np.arange(n_voxels)
+    i_idx = np.hstack((edges[0], edges[1]))
+    j_idx = np.hstack((edges[1], edges[0]))
+    graph = sparse.coo_matrix((np.hstack((weights, weights, diag)),
+                              (np.hstack((i_idx, diag_idx)),
+                               np.hstack((j_idx, diag_idx)))),
+                              (n_voxels, n_voxels),
+                              dtype=dtype)
+    if return_as is np.ndarray:
+        return graph.toarray()
+    return return_as(graph)
+
+
+@_deprecate_positional_args
+def img_to_graph(img, *, mask=None, return_as=sparse.coo_matrix, dtype=None):
+    """Graph of the pixel-to-pixel gradient connections
+
+    Edges are weighted with the gradient values.
+
+    Read more in the :ref:`User Guide <image_feature_extraction>`.
+
+    Parameters
+    ----------
+    img : ndarray of shape (height, width) or (height, width, channel)
+        2D or 3D image.
+    mask : ndarray of shape (height, width) or \
+            (height, width, channel), dtype=bool, default=None
+        An optional mask of the image, to consider only part of the
+        pixels.
+    return_as : np.ndarray or a sparse matrix class, \
+            default=sparse.coo_matrix
+        The class to use to build the returned adjacency matrix.
+    dtype : dtype, default=None
+        The data of the returned sparse matrix. By default it is the
+        dtype of img
+
+    Notes
+    -----
+    For scikit-learn versions 0.14.1 and prior, return_as=np.ndarray was
+    handled by returning a dense np.matrix instance.  Going forward, np.ndarray
+    returns an np.ndarray, as expected.
+
+    For compatibility, user code relying on this method should wrap its
+    calls in ``np.asarray`` to avoid type issues.
+    """
+    img = np.atleast_3d(img)
+    n_x, n_y, n_z = img.shape
+    return _to_graph(n_x, n_y, n_z, mask, img, return_as, dtype)
+
+
+@_deprecate_positional_args
+def grid_to_graph(n_x, n_y, n_z=1, *, mask=None, return_as=sparse.coo_matrix,
+                  dtype=np.int):
+    """Graph of the pixel-to-pixel connections
+
+    Edges exist if 2 voxels are connected.
+
+    Parameters
+    ----------
+    n_x : int
+        Dimension in x axis
+    n_y : int
+        Dimension in y axis
+    n_z : int, default=1
+        Dimension in z axis
+    mask : ndarray of shape (n_x, n_y, n_z), dtype=bool, default=None
+        An optional mask of the image, to consider only part of the
+        pixels.
+    return_as : np.ndarray or a sparse matrix class, \
+            default=sparse.coo_matrix
+        The class to use to build the returned adjacency matrix.
+    dtype : dtype, default=int
+        The data of the returned sparse matrix. By default it is int
+
+    Notes
+    -----
+    For scikit-learn versions 0.14.1 and prior, return_as=np.ndarray was
+    handled by returning a dense np.matrix instance.  Going forward, np.ndarray
+    returns an np.ndarray, as expected.
+
+    For compatibility, user code relying on this method should wrap its
+    calls in ``np.asarray`` to avoid type issues.
+    """
+    return _to_graph(n_x, n_y, n_z, mask=mask, return_as=return_as,
+                     dtype=dtype)
+
+
+###############################################################################
+# From an image to a set of small image patches
+
+def _compute_n_patches(i_h, i_w, p_h, p_w, max_patches=None):
+    """Compute the number of patches that will be extracted in an image.
+
+    Read more in the :ref:`User Guide <image_feature_extraction>`.
+
+    Parameters
+    ----------
+    i_h : int
+        The image height
+    i_w : int
+        The image with
+    p_h : int
+        The height of a patch
+    p_w : int
+        The width of a patch
+    max_patches : int or float, default=None
+        The maximum number of patches to extract. If max_patches is a float
+        between 0 and 1, it is taken to be a proportion of the total number
+        of patches.
+    """
+    n_h = i_h - p_h + 1
+    n_w = i_w - p_w + 1
+    all_patches = n_h * n_w
+
+    if max_patches:
+        if (isinstance(max_patches, (numbers.Integral))
+                and max_patches < all_patches):
+            return max_patches
+        elif (isinstance(max_patches, (numbers.Integral))
+              and max_patches >= all_patches):
+            return all_patches
+        elif (isinstance(max_patches, (numbers.Real))
+                and 0 < max_patches < 1):
+            return int(max_patches * all_patches)
+        else:
+            raise ValueError("Invalid value for max_patches: %r" % max_patches)
+    else:
+        return all_patches
+
+
+def _extract_patches(arr, patch_shape=8, extraction_step=1):
+    """Extracts patches of any n-dimensional array in place using strides.
+
+    Given an n-dimensional array it will return a 2n-dimensional array with
+    the first n dimensions indexing patch position and the last n indexing
+    the patch content. This operation is immediate (O(1)). A reshape
+    performed on the first n dimensions will cause numpy to copy data, leading
+    to a list of extracted patches.
+
+    Read more in the :ref:`User Guide <image_feature_extraction>`.
+
+    Parameters
+    ----------
+    arr : ndarray
+        n-dimensional array of which patches are to be extracted
+
+    patch_shape : int or tuple of length arr.ndim.default=8
+        Indicates the shape of the patches to be extracted. If an
+        integer is given, the shape will be a hypercube of
+        sidelength given by its value.
+
+    extraction_step : int or tuple of length arr.ndim, default=1
+        Indicates step size at which extraction shall be performed.
+        If integer is given, then the step is uniform in all dimensions.
+
+
+    Returns
+    -------
+    patches : strided ndarray
+        2n-dimensional array indexing patches on first n dimensions and
+        containing patches on the last n dimensions. These dimensions
+        are fake, but this way no data is copied. A simple reshape invokes
+        a copying operation to obtain a list of patches:
+        result.reshape([-1] + list(patch_shape))
+    """
+
+    arr_ndim = arr.ndim
+
+    if isinstance(patch_shape, numbers.Number):
+        patch_shape = tuple([patch_shape] * arr_ndim)
+    if isinstance(extraction_step, numbers.Number):
+        extraction_step = tuple([extraction_step] * arr_ndim)
+
+    patch_strides = arr.strides
+
+    slices = tuple(slice(None, None, st) for st in extraction_step)
+    indexing_strides = arr[slices].strides
+
+    patch_indices_shape = ((np.array(arr.shape) - np.array(patch_shape)) //
+                           np.array(extraction_step)) + 1
+
+    shape = tuple(list(patch_indices_shape) + list(patch_shape))
+    strides = tuple(list(indexing_strides) + list(patch_strides))
+
+    patches = as_strided(arr, shape=shape, strides=strides)
+    return patches
+
+
+@deprecated("The function feature_extraction.image.extract_patches has been "
+            "deprecated in 0.22 and will be removed in 0.24.")
+def extract_patches(arr, patch_shape=8, extraction_step=1):
+    """Extracts patches of any n-dimensional array in place using strides.
+
+    Given an n-dimensional array it will return a 2n-dimensional array with
+    the first n dimensions indexing patch position and the last n indexing
+    the patch content. This operation is immediate (O(1)). A reshape
+    performed on the first n dimensions will cause numpy to copy data, leading
+    to a list of extracted patches.
+
+    Read more in the :ref:`User Guide <image_feature_extraction>`.
+
+    Parameters
+    ----------
+    arr : ndarray
+        n-dimensional array of which patches are to be extracted
+
+    patch_shape : int or tuple of length arr.ndim, default=8
+        Indicates the shape of the patches to be extracted. If an
+        integer is given, the shape will be a hypercube of
+        sidelength given by its value.
+
+    extraction_step : int or tuple of length arr.ndim, default=1
+        Indicates step size at which extraction shall be performed.
+        If integer is given, then the step is uniform in all dimensions.
+
+
+    Returns
+    -------
+    patches : strided ndarray
+        2n-dimensional array indexing patches on first n dimensions and
+        containing patches on the last n dimensions. These dimensions
+        are fake, but this way no data is copied. A simple reshape invokes
+        a copying operation to obtain a list of patches:
+        result.reshape([-1] + list(patch_shape))
+    """
+    return _extract_patches(arr, patch_shape=patch_shape,
+                            extraction_step=extraction_step)
+
+
+@_deprecate_positional_args
+def extract_patches_2d(image, patch_size, *, max_patches=None,
+                       random_state=None):
+    """Reshape a 2D image into a collection of patches
+
+    The resulting patches are allocated in a dedicated array.
+
+    Read more in the :ref:`User Guide <image_feature_extraction>`.
+
+    Parameters
+    ----------
+    image : ndarray of shape (image_height, image_width) or \
+        (image_height, image_width, n_channels)
+        The original image data. For color images, the last dimension specifies
+        the channel: a RGB image would have `n_channels=3`.
+
+    patch_size : tuple of int (patch_height, patch_width)
+        The dimensions of one patch.
+
+    max_patches : int or float, default=None
+        The maximum number of patches to extract. If `max_patches` is a float
+        between 0 and 1, it is taken to be a proportion of the total number
+        of patches.
+
+    random_state : int, RandomState instance, default=None
+        Determines the random number generator used for random sampling when
+        `max_patches` is not None. Use an int to make the randomness
+        deterministic.
+        See :term:`Glossary <random_state>`.
+
+    Returns
+    -------
+    patches : array of shape (n_patches, patch_height, patch_width) or \
+        (n_patches, patch_height, patch_width, n_channels)
+        The collection of patches extracted from the image, where `n_patches`
+        is either `max_patches` or the total number of patches that can be
+        extracted.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_sample_image
+    >>> from sklearn.feature_extraction import image
+    >>> # Use the array data from the first image in this dataset:
+    >>> one_image = load_sample_image("china.jpg")
+    >>> print('Image shape: {}'.format(one_image.shape))
+    Image shape: (427, 640, 3)
+    >>> patches = image.extract_patches_2d(one_image, (2, 2))
+    >>> print('Patches shape: {}'.format(patches.shape))
+    Patches shape: (272214, 2, 2, 3)
+    >>> # Here are just two of these patches:
+    >>> print(patches[1])
+    [[[174 201 231]
+      [174 201 231]]
+     [[173 200 230]
+      [173 200 230]]]
+    >>> print(patches[800])
+    [[[187 214 243]
+      [188 215 244]]
+     [[187 214 243]
+      [188 215 244]]]
+    """
+    i_h, i_w = image.shape[:2]
+    p_h, p_w = patch_size
+
+    if p_h > i_h:
+        raise ValueError("Height of the patch should be less than the height"
+                         " of the image.")
+
+    if p_w > i_w:
+        raise ValueError("Width of the patch should be less than the width"
+                         " of the image.")
+
+    image = check_array(image, allow_nd=True)
+    image = image.reshape((i_h, i_w, -1))
+    n_colors = image.shape[-1]
+
+    extracted_patches = _extract_patches(image,
+                                         patch_shape=(p_h, p_w, n_colors),
+                                         extraction_step=1)
+
+    n_patches = _compute_n_patches(i_h, i_w, p_h, p_w, max_patches)
+    if max_patches:
+        rng = check_random_state(random_state)
+        i_s = rng.randint(i_h - p_h + 1, size=n_patches)
+        j_s = rng.randint(i_w - p_w + 1, size=n_patches)
+        patches = extracted_patches[i_s, j_s, 0]
+    else:
+        patches = extracted_patches
+
+    patches = patches.reshape(-1, p_h, p_w, n_colors)
+    # remove the color dimension if useless
+    if patches.shape[-1] == 1:
+        return patches.reshape((n_patches, p_h, p_w))
+    else:
+        return patches
+
+
+def reconstruct_from_patches_2d(patches, image_size):
+    """Reconstruct the image from all of its patches.
+
+    Patches are assumed to overlap and the image is constructed by filling in
+    the patches from left to right, top to bottom, averaging the overlapping
+    regions.
+
+    Read more in the :ref:`User Guide <image_feature_extraction>`.
+
+    Parameters
+    ----------
+    patches : ndarray of shape (n_patches, patch_height, patch_width) or \
+        (n_patches, patch_height, patch_width, n_channels)
+        The complete set of patches. If the patches contain colour information,
+        channels are indexed along the last dimension: RGB patches would
+        have `n_channels=3`.
+
+    image_size : tuple of int (image_height, image_width) or \
+        (image_height, image_width, n_channels)
+        The size of the image that will be reconstructed.
+
+    Returns
+    -------
+    image : ndarray of shape image_size
+        The reconstructed image.
+    """
+    i_h, i_w = image_size[:2]
+    p_h, p_w = patches.shape[1:3]
+    img = np.zeros(image_size)
+    # compute the dimensions of the patches array
+    n_h = i_h - p_h + 1
+    n_w = i_w - p_w + 1
+    for p, (i, j) in zip(patches, product(range(n_h), range(n_w))):
+        img[i:i + p_h, j:j + p_w] += p
+
+    for i in range(i_h):
+        for j in range(i_w):
+            # divide by the amount of overlap
+            # XXX: is this the most efficient way? memory-wise yes, cpu wise?
+            img[i, j] /= float(min(i + 1, p_h, i_h - i) *
+                               min(j + 1, p_w, i_w - j))
+    return img
+
+
+class PatchExtractor(BaseEstimator):
+    """Extracts patches from a collection of images
+
+    Read more in the :ref:`User Guide <image_feature_extraction>`.
+
+    .. versionadded:: 0.9
+
+    Parameters
+    ----------
+    patch_size : tuple of int (patch_height, patch_width)
+        The dimensions of one patch.
+
+    max_patches : int or float, default=None
+        The maximum number of patches per image to extract. If max_patches is a
+        float in (0, 1), it is taken to mean a proportion of the total number
+        of patches.
+
+    random_state : int, RandomState instance, default=None
+        Determines the random number generator used for random sampling when
+        `max_patches` is not None. Use an int to make the randomness
+        deterministic.
+        See :term:`Glossary <random_state>`.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_sample_images
+    >>> from sklearn.feature_extraction import image
+    >>> # Use the array data from the second image in this dataset:
+    >>> X = load_sample_images().images[1]
+    >>> print('Image shape: {}'.format(X.shape))
+    Image shape: (427, 640, 3)
+    >>> pe = image.PatchExtractor(patch_size=(2, 2))
+    >>> pe_fit = pe.fit(X)
+    >>> pe_trans = pe.transform(X)
+    >>> print('Patches shape: {}'.format(pe_trans.shape))
+    Patches shape: (545706, 2, 2)
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, patch_size=None, max_patches=None,
+                 random_state=None):
+        self.patch_size = patch_size
+        self.max_patches = max_patches
+        self.random_state = random_state
+
+    def fit(self, X, y=None):
+        """Do nothing and return the estimator unchanged.
+
+        This method is just there to implement the usual API and hence
+        work in pipelines.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data.
+        """
+        return self
+
+    def transform(self, X):
+        """Transforms the image samples in X into a matrix of patch data.
+
+        Parameters
+        ----------
+        X : ndarray of shape (n_samples, image_height, image_width) or \
+            (n_samples, image_height, image_width, n_channels)
+            Array of images from which to extract patches. For color images,
+            the last dimension specifies the channel: a RGB image would have
+            `n_channels=3`.
+
+        Returns
+        -------
+        patches : array of shape (n_patches, patch_height, patch_width) or \
+             (n_patches, patch_height, patch_width, n_channels)
+             The collection of patches extracted from the images, where
+             `n_patches` is either `n_samples * max_patches` or the total
+             number of patches that can be extracted.
+        """
+        self.random_state = check_random_state(self.random_state)
+        n_images, i_h, i_w = X.shape[:3]
+        X = np.reshape(X, (n_images, i_h, i_w, -1))
+        n_channels = X.shape[-1]
+        if self.patch_size is None:
+            patch_size = i_h // 10, i_w // 10
+        else:
+            patch_size = self.patch_size
+
+        # compute the dimensions of the patches array
+        p_h, p_w = patch_size
+        n_patches = _compute_n_patches(i_h, i_w, p_h, p_w, self.max_patches)
+        patches_shape = (n_images * n_patches,) + patch_size
+        if n_channels > 1:
+            patches_shape += (n_channels,)
+
+        # extract the patches
+        patches = np.empty(patches_shape)
+        for ii, image in enumerate(X):
+            patches[ii * n_patches:(ii + 1) * n_patches] = extract_patches_2d(
+                image, patch_size, max_patches=self.max_patches,
+                random_state=self.random_state)
+        return patches
+
+    def _more_tags(self):
+        return {'X_types': ['3darray']}
--- a/venv/Lib/site-packages/sklearn/feature_extraction/setup.py
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/setup.py
@ -0,0 +1,21 @@
+import os
+import platform
+
+
+def configuration(parent_package='', top_path=None):
+    import numpy
+    from numpy.distutils.misc_util import Configuration
+
+    config = Configuration('feature_extraction', parent_package, top_path)
+    libraries = []
+    if os.name == 'posix':
+        libraries.append('m')
+
+    if platform.python_implementation() != 'PyPy':
+        config.add_extension('_hashing_fast',
+                             sources=['_hashing_fast.pyx'],
+                             include_dirs=[numpy.get_include()],
+                             libraries=libraries)
+    config.add_subpackage("tests")
+
+    return config
--- a/venv/Lib/site-packages/sklearn/feature_extraction/stop_words.py
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/stop_words.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _stop_words  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.feature_extraction.stop_words'
+correct_import_path = 'sklearn.feature_extraction.text'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_stop_words, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/feature_extraction/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/tests/init.py
--- a/venv/Lib/site-packages/sklearn/feature_extraction/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_extraction/tests/pycache/test_dict_vectorizer.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/tests/pycache/test_dict_vectorizer.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_extraction/tests/pycache/test_feature_hasher.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/tests/pycache/test_feature_hasher.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_extraction/tests/pycache/test_image.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/tests/pycache/test_image.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_extraction/tests/pycache/test_text.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/tests/pycache/test_text.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_extraction/tests/test_dict_vectorizer.py
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/tests/test_dict_vectorizer.py
@ -0,0 +1,121 @@
+# Authors: Lars Buitinck
+#          Dan Blanchard <dblanchard@ets.org>
+# License: BSD 3 clause
+
+from random import Random
+import numpy as np
+import scipy.sparse as sp
+from numpy.testing import assert_array_equal
+
+import pytest
+
+from sklearn.feature_extraction import DictVectorizer
+from sklearn.feature_selection import SelectKBest, chi2
+
+
+@pytest.mark.parametrize('sparse', (True, False))
+@pytest.mark.parametrize('dtype', (int, np.float32, np.int16))
+@pytest.mark.parametrize('sort', (True, False))
+@pytest.mark.parametrize('iterable', (True, False))
+def test_dictvectorizer(sparse, dtype, sort, iterable):
+    D = [{"foo": 1, "bar": 3},
+         {"bar": 4, "baz": 2},
+         {"bar": 1, "quux": 1, "quuux": 2}]
+
+    v = DictVectorizer(sparse=sparse, dtype=dtype, sort=sort)
+    X = v.fit_transform(iter(D) if iterable else D)
+
+    assert sp.issparse(X) == sparse
+    assert X.shape == (3, 5)
+    assert X.sum() == 14
+    assert v.inverse_transform(X) == D
+
+    if sparse:
+        # CSR matrices can't be compared for equality
+        assert_array_equal(X.A, v.transform(iter(D) if iterable
+                                            else D).A)
+    else:
+        assert_array_equal(X, v.transform(iter(D) if iterable
+                                          else D))
+
+    if sort:
+        assert (v.feature_names_ ==
+                     sorted(v.feature_names_))
+
+
+def test_feature_selection():
+    # make two feature dicts with two useful features and a bunch of useless
+    # ones, in terms of chi2
+    d1 = dict([("useless%d" % i, 10) for i in range(20)],
+              useful1=1, useful2=20)
+    d2 = dict([("useless%d" % i, 10) for i in range(20)],
+              useful1=20, useful2=1)
+
+    for indices in (True, False):
+        v = DictVectorizer().fit([d1, d2])
+        X = v.transform([d1, d2])
+        sel = SelectKBest(chi2, k=2).fit(X, [0, 1])
+
+        v.restrict(sel.get_support(indices=indices), indices=indices)
+        assert v.get_feature_names() == ["useful1", "useful2"]
+
+
+def test_one_of_k():
+    D_in = [{"version": "1", "ham": 2},
+            {"version": "2", "spam": .3},
+            {"version=3": True, "spam": -1}]
+    v = DictVectorizer()
+    X = v.fit_transform(D_in)
+    assert X.shape == (3, 5)
+
+    D_out = v.inverse_transform(X)
+    assert D_out[0] == {"version=1": 1, "ham": 2}
+
+    names = v.get_feature_names()
+    assert "version=2" in names
+    assert "version" not in names
+
+
+def test_unseen_or_no_features():
+    D = [{"camelot": 0, "spamalot": 1}]
+    for sparse in [True, False]:
+        v = DictVectorizer(sparse=sparse).fit(D)
+
+        X = v.transform({"push the pram a lot": 2})
+        if sparse:
+            X = X.toarray()
+        assert_array_equal(X, np.zeros((1, 2)))
+
+        X = v.transform({})
+        if sparse:
+            X = X.toarray()
+        assert_array_equal(X, np.zeros((1, 2)))
+
+        try:
+            v.transform([])
+        except ValueError as e:
+            assert "empty" in str(e)
+
+
+def test_deterministic_vocabulary():
+    # Generate equal dictionaries with different memory layouts
+    items = [("%03d" % i, i) for i in range(1000)]
+    rng = Random(42)
+    d_sorted = dict(items)
+    rng.shuffle(items)
+    d_shuffled = dict(items)
+
+    # check that the memory layout does not impact the resulting vocabulary
+    v_1 = DictVectorizer().fit([d_sorted])
+    v_2 = DictVectorizer().fit([d_shuffled])
+
+    assert v_1.vocabulary_ == v_2.vocabulary_
+
+
+def test_n_features_in():
+    # For vectorizers, n_features_in_ does not make sense and does not exist.
+    dv = DictVectorizer()
+    assert not hasattr(dv, 'n_features_in_')
+    d = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
+    dv.fit(d)
+    assert not hasattr(dv, 'n_features_in_')
--- a/venv/Lib/site-packages/sklearn/feature_extraction/tests/test_feature_hasher.py
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/tests/test_feature_hasher.py
@ -0,0 +1,171 @@
+
+import numpy as np
+from numpy.testing import assert_array_equal
+import pytest
+
+from sklearn.feature_extraction import FeatureHasher
+from sklearn.utils._testing import (ignore_warnings,
+                                   fails_if_pypy)
+
+pytestmark = fails_if_pypy
+
+
+def test_feature_hasher_dicts():
+    h = FeatureHasher(n_features=16)
+    assert "dict" == h.input_type
+
+    raw_X = [{"foo": "bar", "dada": 42, "tzara": 37},
+             {"foo": "baz", "gaga": "string1"}]
+    X1 = FeatureHasher(n_features=16).transform(raw_X)
+    gen = (iter(d.items()) for d in raw_X)
+    X2 = FeatureHasher(n_features=16, input_type="pair").transform(gen)
+    assert_array_equal(X1.toarray(), X2.toarray())
+
+
+def test_feature_hasher_strings():
+    # mix byte and Unicode strings; note that "foo" is a duplicate in row 0
+    raw_X = [["foo", "bar", "baz", "foo".encode("ascii")],
+             ["bar".encode("ascii"), "baz", "quux"]]
+
+    for lg_n_features in (7, 9, 11, 16, 22):
+        n_features = 2 ** lg_n_features
+
+        it = (x for x in raw_X)                 # iterable
+
+        h = FeatureHasher(n_features=n_features, input_type="string",
+                          alternate_sign=False)
+        X = h.transform(it)
+
+        assert X.shape[0] == len(raw_X)
+        assert X.shape[1] == n_features
+
+        assert X[0].sum() == 4
+        assert X[1].sum() == 3
+
+        assert X.nnz == 6
+
+
+def test_hashing_transform_seed():
+    # check the influence of the seed when computing the hashes
+    # import is here to avoid importing on pypy
+    from sklearn.feature_extraction._hashing_fast import (
+            transform as _hashing_transform)
+    raw_X = [["foo", "bar", "baz", "foo".encode("ascii")],
+             ["bar".encode("ascii"), "baz", "quux"]]
+
+    raw_X_ = (((f, 1) for f in x) for x in raw_X)
+    indices, indptr, _ = _hashing_transform(raw_X_, 2 ** 7, str,
+                                            False)
+
+    raw_X_ = (((f, 1) for f in x) for x in raw_X)
+    indices_0, indptr_0, _ = _hashing_transform(raw_X_, 2 ** 7, str,
+                                                False, seed=0)
+    assert_array_equal(indices, indices_0)
+    assert_array_equal(indptr, indptr_0)
+
+    raw_X_ = (((f, 1) for f in x) for x in raw_X)
+    indices_1, _, _ = _hashing_transform(raw_X_, 2 ** 7, str,
+                                         False, seed=1)
+    with pytest.raises(AssertionError):
+        assert_array_equal(indices, indices_1)
+
+
+def test_feature_hasher_pairs():
+    raw_X = (iter(d.items()) for d in [{"foo": 1, "bar": 2},
+                                       {"baz": 3, "quux": 4, "foo": -1}])
+    h = FeatureHasher(n_features=16, input_type="pair")
+    x1, x2 = h.transform(raw_X).toarray()
+    x1_nz = sorted(np.abs(x1[x1 != 0]))
+    x2_nz = sorted(np.abs(x2[x2 != 0]))
+    assert [1, 2] == x1_nz
+    assert [1, 3, 4] == x2_nz
+
+
+def test_feature_hasher_pairs_with_string_values():
+    raw_X = (iter(d.items()) for d in [{"foo": 1, "bar": "a"},
+                                       {"baz": "abc", "quux": 4, "foo": -1}])
+    h = FeatureHasher(n_features=16, input_type="pair")
+    x1, x2 = h.transform(raw_X).toarray()
+    x1_nz = sorted(np.abs(x1[x1 != 0]))
+    x2_nz = sorted(np.abs(x2[x2 != 0]))
+    assert [1, 1] == x1_nz
+    assert [1, 1, 4] == x2_nz
+
+    raw_X = (iter(d.items()) for d in [{"bax": "abc"},
+                                       {"bax": "abc"}])
+    x1, x2 = h.transform(raw_X).toarray()
+    x1_nz = np.abs(x1[x1 != 0])
+    x2_nz = np.abs(x2[x2 != 0])
+    assert [1] == x1_nz
+    assert [1] == x2_nz
+    assert_array_equal(x1, x2)
+
+
+def test_hash_empty_input():
+    n_features = 16
+    raw_X = [[], (), iter(range(0))]
+
+    h = FeatureHasher(n_features=n_features, input_type="string")
+    X = h.transform(raw_X)
+
+    assert_array_equal(X.A, np.zeros((len(raw_X), n_features)))
+
+
+def test_hasher_invalid_input():
+    with pytest.raises(ValueError):
+        FeatureHasher(input_type="gobbledygook")
+    with pytest.raises(ValueError):
+        FeatureHasher(n_features=-1)
+    with pytest.raises(ValueError):
+        FeatureHasher(n_features=0)
+    with pytest.raises(TypeError):
+        FeatureHasher(n_features='ham')
+
+    h = FeatureHasher(n_features=np.uint16(2 ** 6))
+    with pytest.raises(ValueError):
+        h.transform([])
+    with pytest.raises(Exception):
+        h.transform([[5.5]])
+    with pytest.raises(Exception):
+        h.transform([[None]])
+
+
+def test_hasher_set_params():
+    # Test delayed input validation in fit (useful for grid search).
+    hasher = FeatureHasher()
+    hasher.set_params(n_features=np.inf)
+    with pytest.raises(TypeError):
+        hasher.fit()
+
+
+def test_hasher_zeros():
+    # Assert that no zeros are materialized in the output.
+    X = FeatureHasher().transform([{'foo': 0}])
+    assert X.data.shape == (0,)
+
+
+@ignore_warnings(category=FutureWarning)
+def test_hasher_alternate_sign():
+    X = [list("Thequickbrownfoxjumped")]
+
+    Xt = FeatureHasher(alternate_sign=True,
+                       input_type='string').fit_transform(X)
+    assert Xt.data.min() < 0 and Xt.data.max() > 0
+
+    Xt = FeatureHasher(alternate_sign=False,
+                       input_type='string').fit_transform(X)
+    assert Xt.data.min() > 0
+
+
+def test_hash_collisions():
+    X = [list("Thequickbrownfoxjumped")]
+
+    Xt = FeatureHasher(alternate_sign=True, n_features=1,
+                       input_type='string').fit_transform(X)
+    # check that some of the hashed tokens are added
+    # with an opposite sign and cancel out
+    assert abs(Xt.data[0]) < len(X[0])
+
+    Xt = FeatureHasher(alternate_sign=False, n_features=1,
+                       input_type='string').fit_transform(X)
+    assert Xt.data[0] == len(X[0])
--- a/venv/Lib/site-packages/sklearn/feature_extraction/tests/test_image.py
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/tests/test_image.py
@ -0,0 +1,344 @@
+# Authors: Emmanuelle Gouillart <emmanuelle.gouillart@normalesup.org>
+#          Gael Varoquaux <gael.varoquaux@normalesup.org>
+# License: BSD 3 clause
+
+import numpy as np
+import scipy as sp
+from scipy import ndimage
+from scipy.sparse.csgraph import connected_components
+import pytest
+
+from sklearn.feature_extraction.image import (
+    img_to_graph, grid_to_graph, extract_patches_2d,
+    reconstruct_from_patches_2d, PatchExtractor, _extract_patches,
+    extract_patches)
+from sklearn.utils._testing import ignore_warnings
+
+
+def test_img_to_graph():
+    x, y = np.mgrid[:4, :4] - 10
+    grad_x = img_to_graph(x)
+    grad_y = img_to_graph(y)
+    assert grad_x.nnz == grad_y.nnz
+    # Negative elements are the diagonal: the elements of the original
+    # image. Positive elements are the values of the gradient, they
+    # should all be equal on grad_x and grad_y
+    np.testing.assert_array_equal(grad_x.data[grad_x.data > 0],
+                                  grad_y.data[grad_y.data > 0])
+
+
+def test_grid_to_graph():
+    # Checking that the function works with graphs containing no edges
+    size = 2
+    roi_size = 1
+    # Generating two convex parts with one vertex
+    # Thus, edges will be empty in _to_graph
+    mask = np.zeros((size, size), dtype=np.bool)
+    mask[0:roi_size, 0:roi_size] = True
+    mask[-roi_size:, -roi_size:] = True
+    mask = mask.reshape(size ** 2)
+    A = grid_to_graph(n_x=size, n_y=size, mask=mask, return_as=np.ndarray)
+    assert connected_components(A)[0] == 2
+
+    # Checking that the function works whatever the type of mask is
+    mask = np.ones((size, size), dtype=np.int16)
+    A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask)
+    assert connected_components(A)[0] == 1
+
+    # Checking dtype of the graph
+    mask = np.ones((size, size))
+    A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=np.bool)
+    assert A.dtype == np.bool
+    A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=np.int)
+    assert A.dtype == np.int
+    A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask,
+                      dtype=np.float64)
+    assert A.dtype == np.float64
+
+
+@ignore_warnings(category=DeprecationWarning)  # scipy deprecation inside face
+def test_connect_regions():
+    try:
+        face = sp.face(gray=True)
+    except AttributeError:
+        # Newer versions of scipy have face in misc
+        from scipy import misc
+        face = misc.face(gray=True)
+    # subsample by 4 to reduce run time
+    face = face[::4, ::4]
+    for thr in (50, 150):
+        mask = face > thr
+        graph = img_to_graph(face, mask=mask)
+        assert ndimage.label(mask)[1] == connected_components(graph)[0]
+
+
+@ignore_warnings(category=DeprecationWarning)  # scipy deprecation inside face
+def test_connect_regions_with_grid():
+    try:
+        face = sp.face(gray=True)
+    except AttributeError:
+        # Newer versions of scipy have face in misc
+        from scipy import misc
+        face = misc.face(gray=True)
+
+    # subsample by 4 to reduce run time
+    face = face[::4, ::4]
+
+    mask = face > 50
+    graph = grid_to_graph(*face.shape, mask=mask)
+    assert ndimage.label(mask)[1] == connected_components(graph)[0]
+
+    mask = face > 150
+    graph = grid_to_graph(*face.shape, mask=mask, dtype=None)
+    assert ndimage.label(mask)[1] == connected_components(graph)[0]
+
+
+def _downsampled_face():
+    try:
+        face = sp.face(gray=True)
+    except AttributeError:
+        # Newer versions of scipy have face in misc
+        from scipy import misc
+        face = misc.face(gray=True)
+    face = face.astype(np.float32)
+    face = (face[::2, ::2] + face[1::2, ::2] + face[::2, 1::2]
+            + face[1::2, 1::2])
+    face = (face[::2, ::2] + face[1::2, ::2] + face[::2, 1::2]
+            + face[1::2, 1::2])
+    face = face.astype(np.float32)
+    face /= 16.0
+    return face
+
+
+def _orange_face(face=None):
+    face = _downsampled_face() if face is None else face
+    face_color = np.zeros(face.shape + (3,))
+    face_color[:, :, 0] = 256 - face
+    face_color[:, :, 1] = 256 - face / 2
+    face_color[:, :, 2] = 256 - face / 4
+    return face_color
+
+
+def _make_images(face=None):
+    face = _downsampled_face() if face is None else face
+    # make a collection of faces
+    images = np.zeros((3,) + face.shape)
+    images[0] = face
+    images[1] = face + 1
+    images[2] = face + 2
+    return images
+
+downsampled_face = _downsampled_face()
+orange_face = _orange_face(downsampled_face)
+face_collection = _make_images(downsampled_face)
+
+
+def test_extract_patches_all():
+    face = downsampled_face
+    i_h, i_w = face.shape
+    p_h, p_w = 16, 16
+    expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
+    patches = extract_patches_2d(face, (p_h, p_w))
+    assert patches.shape == (expected_n_patches, p_h, p_w)
+
+
+def test_extract_patches_all_color():
+    face = orange_face
+    i_h, i_w = face.shape[:2]
+    p_h, p_w = 16, 16
+    expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
+    patches = extract_patches_2d(face, (p_h, p_w))
+    assert patches.shape == (expected_n_patches, p_h, p_w, 3)
+
+
+def test_extract_patches_all_rect():
+    face = downsampled_face
+    face = face[:, 32:97]
+    i_h, i_w = face.shape
+    p_h, p_w = 16, 12
+    expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
+
+    patches = extract_patches_2d(face, (p_h, p_w))
+    assert patches.shape == (expected_n_patches, p_h, p_w)
+
+
+def test_extract_patches_max_patches():
+    face = downsampled_face
+    i_h, i_w = face.shape
+    p_h, p_w = 16, 16
+
+    patches = extract_patches_2d(face, (p_h, p_w), max_patches=100)
+    assert patches.shape == (100, p_h, p_w)
+
+    expected_n_patches = int(0.5 * (i_h - p_h + 1) * (i_w - p_w + 1))
+    patches = extract_patches_2d(face, (p_h, p_w), max_patches=0.5)
+    assert patches.shape == (expected_n_patches, p_h, p_w)
+
+    with pytest.raises(ValueError):
+        extract_patches_2d(face, (p_h, p_w), max_patches=2.0)
+    with pytest.raises(ValueError):
+        extract_patches_2d(face, (p_h, p_w), max_patches=-1.0)
+
+
+def test_extract_patch_same_size_image():
+    face = downsampled_face
+    # Request patches of the same size as image
+    # Should return just the single patch a.k.a. the image
+    patches = extract_patches_2d(face, face.shape, max_patches=2)
+    assert patches.shape[0] == 1
+
+
+def test_extract_patches_less_than_max_patches():
+    face = downsampled_face
+    i_h, i_w = face.shape
+    p_h, p_w = 3 * i_h // 4, 3 * i_w // 4
+    # this is 3185
+    expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
+
+    patches = extract_patches_2d(face, (p_h, p_w), max_patches=4000)
+    assert patches.shape == (expected_n_patches, p_h, p_w)
+
+
+def test_reconstruct_patches_perfect():
+    face = downsampled_face
+    p_h, p_w = 16, 16
+
+    patches = extract_patches_2d(face, (p_h, p_w))
+    face_reconstructed = reconstruct_from_patches_2d(patches, face.shape)
+    np.testing.assert_array_almost_equal(face, face_reconstructed)
+
+
+def test_reconstruct_patches_perfect_color():
+    face = orange_face
+    p_h, p_w = 16, 16
+
+    patches = extract_patches_2d(face, (p_h, p_w))
+    face_reconstructed = reconstruct_from_patches_2d(patches, face.shape)
+    np.testing.assert_array_almost_equal(face, face_reconstructed)
+
+
+def test_patch_extractor_fit():
+    faces = face_collection
+    extr = PatchExtractor(patch_size=(8, 8), max_patches=100, random_state=0)
+    assert extr == extr.fit(faces)
+
+
+def test_patch_extractor_max_patches():
+    faces = face_collection
+    i_h, i_w = faces.shape[1:3]
+    p_h, p_w = 8, 8
+
+    max_patches = 100
+    expected_n_patches = len(faces) * max_patches
+    extr = PatchExtractor(patch_size=(p_h, p_w), max_patches=max_patches,
+                          random_state=0)
+    patches = extr.transform(faces)
+    assert patches.shape == (expected_n_patches, p_h, p_w)
+
+    max_patches = 0.5
+    expected_n_patches = len(faces) * int((i_h - p_h + 1) * (i_w - p_w + 1)
+                                          * max_patches)
+    extr = PatchExtractor(patch_size=(p_h, p_w), max_patches=max_patches,
+                          random_state=0)
+    patches = extr.transform(faces)
+    assert patches.shape == (expected_n_patches, p_h, p_w)
+
+
+def test_patch_extractor_max_patches_default():
+    faces = face_collection
+    extr = PatchExtractor(max_patches=100, random_state=0)
+    patches = extr.transform(faces)
+    assert patches.shape == (len(faces) * 100, 19, 25)
+
+
+def test_patch_extractor_all_patches():
+    faces = face_collection
+    i_h, i_w = faces.shape[1:3]
+    p_h, p_w = 8, 8
+    expected_n_patches = len(faces) * (i_h - p_h + 1) * (i_w - p_w + 1)
+    extr = PatchExtractor(patch_size=(p_h, p_w), random_state=0)
+    patches = extr.transform(faces)
+    assert patches.shape == (expected_n_patches, p_h, p_w)
+
+
+def test_patch_extractor_color():
+    faces = _make_images(orange_face)
+    i_h, i_w = faces.shape[1:3]
+    p_h, p_w = 8, 8
+    expected_n_patches = len(faces) * (i_h - p_h + 1) * (i_w - p_w + 1)
+    extr = PatchExtractor(patch_size=(p_h, p_w), random_state=0)
+    patches = extr.transform(faces)
+    assert patches.shape == (expected_n_patches, p_h, p_w, 3)
+
+
+def test_extract_patches_strided():
+
+    image_shapes_1D = [(10,), (10,), (11,), (10,)]
+    patch_sizes_1D = [(1,), (2,), (3,), (8,)]
+    patch_steps_1D = [(1,), (1,), (4,), (2,)]
+
+    expected_views_1D = [(10,), (9,), (3,), (2,)]
+    last_patch_1D = [(10,), (8,), (8,), (2,)]
+
+    image_shapes_2D = [(10, 20), (10, 20), (10, 20), (11, 20)]
+    patch_sizes_2D = [(2, 2), (10, 10), (10, 11), (6, 6)]
+    patch_steps_2D = [(5, 5), (3, 10), (3, 4), (4, 2)]
+
+    expected_views_2D = [(2, 4), (1, 2), (1, 3), (2, 8)]
+    last_patch_2D = [(5, 15), (0, 10), (0, 8), (4, 14)]
+
+    image_shapes_3D = [(5, 4, 3), (3, 3, 3), (7, 8, 9), (7, 8, 9)]
+    patch_sizes_3D = [(2, 2, 3), (2, 2, 2), (1, 7, 3), (1, 3, 3)]
+    patch_steps_3D = [(1, 2, 10), (1, 1, 1), (2, 1, 3), (3, 3, 4)]
+
+    expected_views_3D = [(4, 2, 1), (2, 2, 2), (4, 2, 3), (3, 2, 2)]
+    last_patch_3D = [(3, 2, 0), (1, 1, 1), (6, 1, 6), (6, 3, 4)]
+
+    image_shapes = image_shapes_1D + image_shapes_2D + image_shapes_3D
+    patch_sizes = patch_sizes_1D + patch_sizes_2D + patch_sizes_3D
+    patch_steps = patch_steps_1D + patch_steps_2D + patch_steps_3D
+    expected_views = expected_views_1D + expected_views_2D + expected_views_3D
+    last_patches = last_patch_1D + last_patch_2D + last_patch_3D
+
+    for (image_shape, patch_size, patch_step, expected_view,
+         last_patch) in zip(image_shapes, patch_sizes, patch_steps,
+                            expected_views, last_patches):
+        image = np.arange(np.prod(image_shape)).reshape(image_shape)
+        patches = _extract_patches(image, patch_shape=patch_size,
+                                   extraction_step=patch_step)
+
+        ndim = len(image_shape)
+
+        assert patches.shape[:ndim] == expected_view
+        last_patch_slices = tuple(slice(i, i + j, None) for i, j in
+                                  zip(last_patch, patch_size))
+        assert (patches[(-1, None, None) * ndim] ==
+                image[last_patch_slices].squeeze()).all()
+
+
+def test_extract_patches_square():
+    # test same patch size for all dimensions
+    face = downsampled_face
+    i_h, i_w = face.shape
+    p = 8
+    expected_n_patches = ((i_h - p + 1), (i_w - p + 1))
+    patches = _extract_patches(face, patch_shape=p)
+    assert patches.shape == (expected_n_patches[0],
+                             expected_n_patches[1], p, p)
+
+
+def test_width_patch():
+    # width and height of the patch should be less than the image
+    x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    with pytest.raises(ValueError):
+        extract_patches_2d(x, (4, 1))
+    with pytest.raises(ValueError):
+        extract_patches_2d(x, (1, 4))
+
+
+# TODO: Remove in 0.24
+def test_extract_patches_deprecated():
+    msg = ("The function feature_extraction.image.extract_patches has been "
+           "deprecated in 0.22 and will be removed in 0.24.")
+    with pytest.warns(FutureWarning, match=msg):
+        extract_patches(downsampled_face)
--- a/venv/Lib/site-packages/sklearn/feature_extraction/tests/test_text.py
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/tests/test_text.py
--- a/venv/Lib/site-packages/sklearn/feature_extraction/text.py
+++ b/venv/Lib/site-packages/sklearn/feature_extraction/text.py