Vehicle-Anti-Theft-Face-Rec.../venv/Lib/site-packages/bleach/sanitizer.py

from __future__ import unicode_literals

from itertools import chain
import re
import warnings

import six
from six.moves.urllib.parse import urlparse
from xml.sax.saxutils import unescape

from bleach import html5lib_shim
from bleach.utils import alphabetize_attributes, force_unicode


#: List of allowed tags
ALLOWED_TAGS = [
    "a",
    "abbr",
    "acronym",
    "b",
    "blockquote",
    "code",
    "em",
    "i",
    "li",
    "ol",
    "strong",
    "ul",
]


#: Map of allowed attributes by tag
ALLOWED_ATTRIBUTES = {
    "a": ["href", "title"],
    "abbr": ["title"],
    "acronym": ["title"],
}

#: List of allowed styles
ALLOWED_STYLES = []

#: List of allowed protocols
ALLOWED_PROTOCOLS = ["http", "https", "mailto"]

#: Invisible characters--0 to and including 31 except 9 (tab), 10 (lf), and 13 (cr)
INVISIBLE_CHARACTERS = "".join(
    [chr(c) for c in chain(range(0, 9), range(11, 13), range(14, 32))]
)

#: Regexp for characters that are invisible
INVISIBLE_CHARACTERS_RE = re.compile("[" + INVISIBLE_CHARACTERS + "]", re.UNICODE)

#: String to replace invisible characters with. This can be a character, a
#: string, or even a function that takes a Python re matchobj
INVISIBLE_REPLACEMENT_CHAR = "?"


class Cleaner(object):
    """Cleaner for cleaning HTML fragments of malicious content

    This cleaner is a security-focused function whose sole purpose is to remove
    malicious content from a string such that it can be displayed as content in
    a web page.

    To use::

        from bleach.sanitizer import Cleaner

        cleaner = Cleaner()

        for text in all_the_yucky_things:
            sanitized = cleaner.clean(text)

    .. Note::

       This cleaner is not designed to use to transform content to be used in
       non-web-page contexts.

    .. Warning::

       This cleaner is not thread-safe--the html parser has internal state.
       Create a separate cleaner per thread!


    """

    def __init__(
        self,
        tags=ALLOWED_TAGS,
        attributes=ALLOWED_ATTRIBUTES,
        styles=ALLOWED_STYLES,
        protocols=ALLOWED_PROTOCOLS,
        strip=False,
        strip_comments=True,
        filters=None,
    ):
        """Initializes a Cleaner

        :arg list tags: allowed list of tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list styles: allowed list of css styles; defaults to
            ``bleach.sanitizer.ALLOWED_STYLES``

        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip: whether or not to strip disallowed elements

        :arg bool strip_comments: whether or not to strip HTML comments

        :arg list filters: list of html5lib Filter classes to pass streamed content through

            .. seealso:: http://html5lib.readthedocs.io/en/latest/movingparts.html#filters

            .. Warning::

               Using filters changes the output of ``bleach.Cleaner.clean``.
               Make sure the way the filters change the output are secure.

        """
        self.tags = tags
        self.attributes = attributes
        self.styles = styles
        self.protocols = protocols
        self.strip = strip
        self.strip_comments = strip_comments
        self.filters = filters or []

        self.parser = html5lib_shim.BleachHTMLParser(
            tags=self.tags,
            strip=self.strip,
            consume_entities=False,
            namespaceHTMLElements=False,
        )
        self.walker = html5lib_shim.getTreeWalker("etree")
        self.serializer = html5lib_shim.BleachHTMLSerializer(
            quote_attr_values="always",
            omit_optional_tags=False,
            escape_lt_in_attrs=True,
            # We want to leave entities as they are without escaping or
            # resolving or expanding
            resolve_entities=False,
            # Bleach has its own sanitizer, so don't use the html5lib one
            sanitize=False,
            # Bleach sanitizer alphabetizes already, so don't use the html5lib one
            alphabetical_attributes=False,
        )

    def clean(self, text):
        """Cleans text and returns sanitized result as unicode

        :arg str text: text to be cleaned

        :returns: sanitized text as unicode

        :raises TypeError: if ``text`` is not a text type

        """
        if not isinstance(text, six.string_types):
            message = (
                "argument cannot be of '{name}' type, must be of text type".format(
                    name=text.__class__.__name__
                )
            )
            raise TypeError(message)

        if not text:
            return ""

        text = force_unicode(text)

        dom = self.parser.parseFragment(text)
        filtered = BleachSanitizerFilter(
            source=self.walker(dom),
            # Bleach-sanitizer-specific things
            attributes=self.attributes,
            strip_disallowed_elements=self.strip,
            strip_html_comments=self.strip_comments,
            # html5lib-sanitizer things
            allowed_elements=self.tags,
            allowed_css_properties=self.styles,
            allowed_protocols=self.protocols,
            allowed_svg_properties=[],
        )

        # Apply any filters after the BleachSanitizerFilter
        for filter_class in self.filters:
            filtered = filter_class(source=filtered)

        return self.serializer.render(filtered)


def attribute_filter_factory(attributes):
    """Generates attribute filter function for the given attributes value

    The attributes value can take one of several shapes. This returns a filter
    function appropriate to the attributes value. One nice thing about this is
    that there's less if/then shenanigans in the ``allow_token`` method.

    """
    if callable(attributes):
        return attributes

    if isinstance(attributes, dict):

        def _attr_filter(tag, attr, value):
            if tag in attributes:
                attr_val = attributes[tag]
                if callable(attr_val):
                    return attr_val(tag, attr, value)

                if attr in attr_val:
                    return True

            if "*" in attributes:
                attr_val = attributes["*"]
                if callable(attr_val):
                    return attr_val(tag, attr, value)

                return attr in attr_val

            return False

        return _attr_filter

    if isinstance(attributes, list):

        def _attr_filter(tag, attr, value):
            return attr in attributes

        return _attr_filter

    raise ValueError("attributes needs to be a callable, a list or a dict")


class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
    """html5lib Filter that sanitizes text

    This filter can be used anywhere html5lib filters can be used.

    """

    def __init__(
        self,
        source,
        attributes=ALLOWED_ATTRIBUTES,
        strip_disallowed_elements=False,
        strip_html_comments=True,
        **kwargs
    ):
        """Creates a BleachSanitizerFilter instance

        :arg Treewalker source: stream

        :arg list tags: allowed list of tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list styles: allowed list of css styles; defaults to
            ``bleach.sanitizer.ALLOWED_STYLES``

        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip_disallowed_elements: whether or not to strip disallowed
            elements

        :arg bool strip_html_comments: whether or not to strip HTML comments

        """
        self.attr_filter = attribute_filter_factory(attributes)
        self.strip_disallowed_elements = strip_disallowed_elements
        self.strip_html_comments = strip_html_comments

        # filter out html5lib deprecation warnings to use bleach from BleachSanitizerFilter init
        warnings.filterwarnings(
            "ignore",
            message="html5lib's sanitizer is deprecated",
            category=DeprecationWarning,
            module="bleach._vendor.html5lib",
        )
        return super(BleachSanitizerFilter, self).__init__(source, **kwargs)

    def sanitize_stream(self, token_iterator):
        for token in token_iterator:
            ret = self.sanitize_token(token)

            if not ret:
                continue

            if isinstance(ret, list):
                for subtoken in ret:
                    yield subtoken
            else:
                yield ret

    def merge_characters(self, token_iterator):
        """Merge consecutive Characters tokens in a stream"""
        characters_buffer = []

        for token in token_iterator:
            if characters_buffer:
                if token["type"] == "Characters":
                    characters_buffer.append(token)
                    continue
                else:
                    # Merge all the characters tokens together into one and then
                    # operate on it.
                    new_token = {
                        "data": "".join(
                            [char_token["data"] for char_token in characters_buffer]
                        ),
                        "type": "Characters",
                    }
                    characters_buffer = []
                    yield new_token

            elif token["type"] == "Characters":
                characters_buffer.append(token)
                continue

            yield token

        new_token = {
            "data": "".join([char_token["data"] for char_token in characters_buffer]),
            "type": "Characters",
        }
        yield new_token

    def __iter__(self):
        return self.merge_characters(
            self.sanitize_stream(html5lib_shim.Filter.__iter__(self))
        )

    def sanitize_token(self, token):
        """Sanitize a token either by HTML-encoding or dropping.

        Unlike sanitizer.Filter, allowed_attributes can be a dict of {'tag':
        ['attribute', 'pairs'], 'tag': callable}.

        Here callable is a function with two arguments of attribute name and
        value. It should return true of false.

        Also gives the option to strip tags instead of encoding.

        :arg dict token: token to sanitize

        :returns: token or list of tokens

        """
        token_type = token["type"]
        if token_type in ["StartTag", "EndTag", "EmptyTag"]:
            if token["name"] in self.allowed_elements:
                return self.allow_token(token)

            elif self.strip_disallowed_elements:
                return None

            else:
                if "data" in token:
                    # Alphabetize the attributes before calling .disallowed_token()
                    # so that the resulting string is stable
                    token["data"] = alphabetize_attributes(token["data"])
                return self.disallowed_token(token)

        elif token_type == "Comment":
            if not self.strip_html_comments:
                return token
            else:
                return None

        elif token_type == "Characters":
            return self.sanitize_characters(token)

        else:
            return token

    def sanitize_characters(self, token):
        """Handles Characters tokens

        Our overridden tokenizer doesn't do anything with entities. However,
        that means that the serializer will convert all ``&`` in Characters
        tokens to ``&amp;``.

        Since we don't want that, we extract entities here and convert them to
        Entity tokens so the serializer will let them be.

        :arg token: the Characters token to work on

        :returns: a list of tokens

        """
        data = token.get("data", "")

        if not data:
            return token

        data = INVISIBLE_CHARACTERS_RE.sub(INVISIBLE_REPLACEMENT_CHAR, data)
        token["data"] = data

        # If there isn't a & in the data, we can return now
        if "&" not in data:
            return token

        new_tokens = []

        # For each possible entity that starts with a "&", we try to extract an
        # actual entity and re-tokenize accordingly
        for part in html5lib_shim.next_possible_entity(data):
            if not part:
                continue

            if part.startswith("&"):
                entity = html5lib_shim.match_entity(part)
                if entity is not None:
                    if entity == "amp":
                        # LinkifyFilter can't match urls across token boundaries
                        # which is problematic with &amp; since that shows up in
                        # querystrings all the time. This special-cases &amp;
                        # and converts it to a & and sticks it in as a
                        # Characters token. It'll get merged with surrounding
                        # tokens in the BleachSanitizerfilter.__iter__ and
                        # escaped in the serializer.
                        new_tokens.append({"type": "Characters", "data": "&"})
                    else:
                        new_tokens.append({"type": "Entity", "name": entity})

                    # Length of the entity plus 2--one for & at the beginning
                    # and one for ; at the end
                    remainder = part[len(entity) + 2 :]
                    if remainder:
                        new_tokens.append({"type": "Characters", "data": remainder})
                    continue

            new_tokens.append({"type": "Characters", "data": part})

        return new_tokens

    def sanitize_uri_value(self, value, allowed_protocols):
        """Checks a uri value to see if it's allowed

        :arg value: the uri value to sanitize
        :arg allowed_protocols: list of allowed protocols

        :returns: allowed value or None

        """
        # NOTE(willkg): This transforms the value into one that's easier to
        # match and verify, but shouldn't get returned since it's vastly
        # different than the original value.

        # Convert all character entities in the value
        new_value = html5lib_shim.convert_entities(value)

        # Nix backtick, space characters, and control characters
        new_value = re.sub(r"[`\000-\040\177-\240\s]+", "", new_value)

        # Remove REPLACEMENT characters
        new_value = new_value.replace("\ufffd", "")

        # Lowercase it--this breaks the value, but makes it easier to match
        # against
        new_value = new_value.lower()

        try:
            # Drop attributes with uri values that have protocols that aren't
            # allowed
            parsed = urlparse(new_value)
        except ValueError:
            # URI is impossible to parse, therefore it's not allowed
            return None

        if parsed.scheme:
            # If urlparse found a scheme, check that
            if parsed.scheme in allowed_protocols:
                return value

        else:
            # Allow uris that are just an anchor
            if new_value.startswith("#"):
                return value

            # Handle protocols that urlparse doesn't recognize like "myprotocol"
            if ":" in new_value and new_value.split(":")[0] in allowed_protocols:
                return value

            # If there's no protocol/scheme specified, then assume it's "http"
            # and see if that's allowed
            if "http" in allowed_protocols:
                return value

        return None

    def allow_token(self, token):
        """Handles the case where we're allowing the tag"""
        if "data" in token:
            # Loop through all the attributes and drop the ones that are not
            # allowed, are unsafe or break other rules. Additionally, fix
            # attribute values that need fixing.
            #
            # At the end of this loop, we have the final set of attributes
            # we're keeping.
            attrs = {}
            for namespaced_name, val in token["data"].items():
                namespace, name = namespaced_name

                # Drop attributes that are not explicitly allowed
                #
                # NOTE(willkg): We pass in the attribute name--not a namespaced
                # name.
                if not self.attr_filter(token["name"], name, val):
                    continue

                # Drop attributes with uri values that use a disallowed protocol
                # Sanitize attributes with uri values
                if namespaced_name in self.attr_val_is_uri:
                    new_value = self.sanitize_uri_value(val, self.allowed_protocols)
                    if new_value is None:
                        continue
                    val = new_value

                # Drop values in svg attrs with non-local IRIs
                if namespaced_name in self.svg_attr_val_allows_ref:
                    new_val = re.sub(r"url\s*\(\s*[^#\s][^)]+?\)", " ", unescape(val))
                    new_val = new_val.strip()
                    if not new_val:
                        continue

                    else:
                        # Replace the val with the unescaped version because
                        # it's a iri
                        val = new_val

                # Drop href and xlink:href attr for svg elements with non-local IRIs
                if (None, token["name"]) in self.svg_allow_local_href:
                    if namespaced_name in [
                        (None, "href"),
                        (html5lib_shim.namespaces["xlink"], "href"),
                    ]:
                        if re.search(r"^\s*[^#\s]", val):
                            continue

                # If it's a style attribute, sanitize it
                if namespaced_name == (None, "style"):
                    val = self.sanitize_css(val)

                # At this point, we want to keep the attribute, so add it in
                attrs[namespaced_name] = val

            token["data"] = alphabetize_attributes(attrs)

        return token

    def disallowed_token(self, token):
        token_type = token["type"]
        if token_type == "EndTag":
            token["data"] = "</%s>" % token["name"]

        elif token["data"]:
            assert token_type in ("StartTag", "EmptyTag")
            attrs = []
            for (ns, name), v in token["data"].items():
                # If we end up with a namespace, but no name, switch them so we
                # have a valid name to use.
                if ns and not name:
                    ns, name = name, ns

                # Figure out namespaced name if the namespace is appropriate
                # and exists; if the ns isn't in prefixes, then drop it.
                if ns is None or ns not in html5lib_shim.prefixes:
                    namespaced_name = name
                else:
                    namespaced_name = "%s:%s" % (html5lib_shim.prefixes[ns], name)

                attrs.append(
                    ' %s="%s"'
                    % (
                        namespaced_name,
                        # NOTE(willkg): HTMLSerializer escapes attribute values
                        # already, so if we do it here (like HTMLSerializer does),
                        # then we end up double-escaping.
                        v,
                    )
                )
            token["data"] = "<%s%s>" % (token["name"], "".join(attrs))

        else:
            token["data"] = "<%s>" % token["name"]

        if token.get("selfClosing"):
            token["data"] = token["data"][:-1] + "/>"

        token["type"] = "Characters"

        del token["name"]
        return token

    def sanitize_css(self, style):
        """Sanitizes css in style tags"""
        # Convert entities in the style so that it can be parsed as CSS
        style = html5lib_shim.convert_entities(style)

        # Drop any url values before we do anything else
        style = re.compile(r"url\s*\(\s*[^\s)]+?\s*\)\s*").sub(" ", style)

        # The gauntlet of sanitization

        # Validate the css in the style tag and if it's not valid, then drop
        # the whole thing.
        parts = style.split(";")
        gauntlet = re.compile(
            r"""^(  # consider a style attribute value as composed of:
[/:,#%!.\s\w]    # a non-newline character
|\w-\w           # 3 characters in the form \w-\w
|'[\s\w]+'\s*    # a single quoted string of [\s\w]+ with trailing space
|"[\s\w]+"       # a double quoted string of [\s\w]+
|\([\d,%\.\s]+\) # a parenthesized string of one or more digits, commas, periods, ...
)*$""",  # ... percent signs, or whitespace e.g. from 'color: hsl(30,100%,50%)'
            flags=re.U | re.VERBOSE,
        )

        for part in parts:
            if not gauntlet.match(part):
                return ""

        if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
            return ""

        clean = []
        for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
            if not value:
                continue

            if prop.lower() in self.allowed_css_properties:
                clean.append(prop + ": " + value + ";")

            elif prop.lower() in self.allowed_svg_properties:
                clean.append(prop + ": " + value + ";")

        return " ".join(clean)
Uploaded Test files 2020-11-12 16:05:57 +00:00			`from __future__ import unicode_literals`

			`from itertools import chain`
			`import re`
			`import warnings`

			`import six`
			`from six.moves.urllib.parse import urlparse`
			`from xml.sax.saxutils import unescape`

			`from bleach import html5lib_shim`
			`from bleach.utils import alphabetize_attributes, force_unicode`


			`#: List of allowed tags`
			`ALLOWED_TAGS = [`
			`"a",`
			`"abbr",`
			`"acronym",`
			`"b",`
			`"blockquote",`
			`"code",`
			`"em",`
			`"i",`
			`"li",`
			`"ol",`
			`"strong",`
			`"ul",`
			`]`


			`#: Map of allowed attributes by tag`
			`ALLOWED_ATTRIBUTES = {`
			`"a": ["href", "title"],`
			`"abbr": ["title"],`
			`"acronym": ["title"],`
			`}`

			`#: List of allowed styles`
			`ALLOWED_STYLES = []`

			`#: List of allowed protocols`
			`ALLOWED_PROTOCOLS = ["http", "https", "mailto"]`

			`#: Invisible characters--0 to and including 31 except 9 (tab), 10 (lf), and 13 (cr)`
			`INVISIBLE_CHARACTERS = "".join(`
			`[chr(c) for c in chain(range(0, 9), range(11, 13), range(14, 32))]`
			`)`

			`#: Regexp for characters that are invisible`
			`INVISIBLE_CHARACTERS_RE = re.compile("[" + INVISIBLE_CHARACTERS + "]", re.UNICODE)`

			`#: String to replace invisible characters with. This can be a character, a`
			`#: string, or even a function that takes a Python re matchobj`
			`INVISIBLE_REPLACEMENT_CHAR = "?"`


			`class Cleaner(object):`
			`"""Cleaner for cleaning HTML fragments of malicious content`

			`This cleaner is a security-focused function whose sole purpose is to remove`
			`malicious content from a string such that it can be displayed as content in`
			`a web page.`

			`To use::`

			`from bleach.sanitizer import Cleaner`

			`cleaner = Cleaner()`

			`for text in all_the_yucky_things:`
			`sanitized = cleaner.clean(text)`

			`.. Note::`

			`This cleaner is not designed to use to transform content to be used in`
			`non-web-page contexts.`

			`.. Warning::`

			`This cleaner is not thread-safe--the html parser has internal state.`
			`Create a separate cleaner per thread!`


			`"""`

			`def __init__(`
			`self,`
			`tags=ALLOWED_TAGS,`
			`attributes=ALLOWED_ATTRIBUTES,`
			`styles=ALLOWED_STYLES,`
			`protocols=ALLOWED_PROTOCOLS,`
			`strip=False,`
			`strip_comments=True,`
			`filters=None,`
			`):`
			`"""Initializes a Cleaner`

			`:arg list tags: allowed list of tags; defaults to`
			``bleach.sanitizer.ALLOWED_TAGS``

			`:arg dict attributes: allowed attributes; can be a callable, list or dict;`
			defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

			`:arg list styles: allowed list of css styles; defaults to`
			``bleach.sanitizer.ALLOWED_STYLES``

			`:arg list protocols: allowed list of protocols for links; defaults`
			to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

			`:arg bool strip: whether or not to strip disallowed elements`

			`:arg bool strip_comments: whether or not to strip HTML comments`

			`:arg list filters: list of html5lib Filter classes to pass streamed content through`

			`.. seealso:: http://html5lib.readthedocs.io/en/latest/movingparts.html#filters`

			`.. Warning::`

			Using filters changes the output of ``bleach.Cleaner.clean``.
			`Make sure the way the filters change the output are secure.`

			`"""`
			`self.tags = tags`
			`self.attributes = attributes`
			`self.styles = styles`
			`self.protocols = protocols`
			`self.strip = strip`
			`self.strip_comments = strip_comments`
			`self.filters = filters or []`

			`self.parser = html5lib_shim.BleachHTMLParser(`
			`tags=self.tags,`
			`strip=self.strip,`
			`consume_entities=False,`
			`namespaceHTMLElements=False,`
			`)`
			`self.walker = html5lib_shim.getTreeWalker("etree")`
			`self.serializer = html5lib_shim.BleachHTMLSerializer(`
			`quote_attr_values="always",`
			`omit_optional_tags=False,`
			`escape_lt_in_attrs=True,`
			`# We want to leave entities as they are without escaping or`
			`# resolving or expanding`
			`resolve_entities=False,`
			`# Bleach has its own sanitizer, so don't use the html5lib one`
			`sanitize=False,`
			`# Bleach sanitizer alphabetizes already, so don't use the html5lib one`
			`alphabetical_attributes=False,`
			`)`

			`def clean(self, text):`
			`"""Cleans text and returns sanitized result as unicode`

			`:arg str text: text to be cleaned`

			`:returns: sanitized text as unicode`

			:raises TypeError: if ``text`` is not a text type

			`"""`
			`if not isinstance(text, six.string_types):`
			`message = (`
			`"argument cannot be of '{name}' type, must be of text type".format(`
			`name=text.__class__.__name__`
			`)`
			`)`
			`raise TypeError(message)`

			`if not text:`
			`return ""`

			`text = force_unicode(text)`

			`dom = self.parser.parseFragment(text)`
			`filtered = BleachSanitizerFilter(`
			`source=self.walker(dom),`
			`# Bleach-sanitizer-specific things`
			`attributes=self.attributes,`
			`strip_disallowed_elements=self.strip,`
			`strip_html_comments=self.strip_comments,`
			`# html5lib-sanitizer things`
			`allowed_elements=self.tags,`
			`allowed_css_properties=self.styles,`
			`allowed_protocols=self.protocols,`
			`allowed_svg_properties=[],`
			`)`

			`# Apply any filters after the BleachSanitizerFilter`
			`for filter_class in self.filters:`
			`filtered = filter_class(source=filtered)`

			`return self.serializer.render(filtered)`


			`def attribute_filter_factory(attributes):`
			`"""Generates attribute filter function for the given attributes value`

			`The attributes value can take one of several shapes. This returns a filter`
			`function appropriate to the attributes value. One nice thing about this is`
			that there's less if/then shenanigans in the ``allow_token`` method.

			`"""`
			`if callable(attributes):`
			`return attributes`

			`if isinstance(attributes, dict):`

			`def _attr_filter(tag, attr, value):`
			`if tag in attributes:`
			`attr_val = attributes[tag]`
			`if callable(attr_val):`
			`return attr_val(tag, attr, value)`

			`if attr in attr_val:`
			`return True`

			`if "*" in attributes:`
			`attr_val = attributes["*"]`
			`if callable(attr_val):`
			`return attr_val(tag, attr, value)`

			`return attr in attr_val`

			`return False`

			`return _attr_filter`

			`if isinstance(attributes, list):`

			`def _attr_filter(tag, attr, value):`
			`return attr in attributes`

			`return _attr_filter`

			`raise ValueError("attributes needs to be a callable, a list or a dict")`


			`class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):`
			`"""html5lib Filter that sanitizes text`

			`This filter can be used anywhere html5lib filters can be used.`

			`"""`

			`def __init__(`
			`self,`
			`source,`
			`attributes=ALLOWED_ATTRIBUTES,`
			`strip_disallowed_elements=False,`
			`strip_html_comments=True,`
			`**kwargs`
			`):`
			`"""Creates a BleachSanitizerFilter instance`

			`:arg Treewalker source: stream`

			`:arg list tags: allowed list of tags; defaults to`
			``bleach.sanitizer.ALLOWED_TAGS``

			`:arg dict attributes: allowed attributes; can be a callable, list or dict;`
			defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

			`:arg list styles: allowed list of css styles; defaults to`
			``bleach.sanitizer.ALLOWED_STYLES``

			`:arg list protocols: allowed list of protocols for links; defaults`
			to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

			`:arg bool strip_disallowed_elements: whether or not to strip disallowed`
			`elements`

			`:arg bool strip_html_comments: whether or not to strip HTML comments`

			`"""`
			`self.attr_filter = attribute_filter_factory(attributes)`
			`self.strip_disallowed_elements = strip_disallowed_elements`
			`self.strip_html_comments = strip_html_comments`

			`# filter out html5lib deprecation warnings to use bleach from BleachSanitizerFilter init`
			`warnings.filterwarnings(`
			`"ignore",`
			`message="html5lib's sanitizer is deprecated",`
			`category=DeprecationWarning,`
			`module="bleach._vendor.html5lib",`
			`)`
			`return super(BleachSanitizerFilter, self).__init__(source, **kwargs)`

			`def sanitize_stream(self, token_iterator):`
			`for token in token_iterator:`
			`ret = self.sanitize_token(token)`

			`if not ret:`
			`continue`

			`if isinstance(ret, list):`
			`for subtoken in ret:`
			`yield subtoken`
			`else:`
			`yield ret`

			`def merge_characters(self, token_iterator):`
			`"""Merge consecutive Characters tokens in a stream"""`
			`characters_buffer = []`

			`for token in token_iterator:`
			`if characters_buffer:`
			`if token["type"] == "Characters":`
			`characters_buffer.append(token)`
			`continue`
			`else:`
			`# Merge all the characters tokens together into one and then`
			`# operate on it.`
			`new_token = {`
			`"data": "".join(`
			`[char_token["data"] for char_token in characters_buffer]`
			`),`
			`"type": "Characters",`
			`}`
			`characters_buffer = []`
			`yield new_token`

			`elif token["type"] == "Characters":`
			`characters_buffer.append(token)`
			`continue`

			`yield token`

			`new_token = {`
			`"data": "".join([char_token["data"] for char_token in characters_buffer]),`
			`"type": "Characters",`
			`}`
			`yield new_token`

			`def __iter__(self):`
			`return self.merge_characters(`
			`self.sanitize_stream(html5lib_shim.Filter.__iter__(self))`
			`)`

			`def sanitize_token(self, token):`
			`"""Sanitize a token either by HTML-encoding or dropping.`

			`Unlike sanitizer.Filter, allowed_attributes can be a dict of {'tag':`
			`['attribute', 'pairs'], 'tag': callable}.`

			`Here callable is a function with two arguments of attribute name and`
			`value. It should return true of false.`

			`Also gives the option to strip tags instead of encoding.`

			`:arg dict token: token to sanitize`

			`:returns: token or list of tokens`

			`"""`
			`token_type = token["type"]`
			`if token_type in ["StartTag", "EndTag", "EmptyTag"]:`
			`if token["name"] in self.allowed_elements:`
			`return self.allow_token(token)`

			`elif self.strip_disallowed_elements:`
			`return None`

			`else:`
			`if "data" in token:`
			`# Alphabetize the attributes before calling .disallowed_token()`
			`# so that the resulting string is stable`
			`token["data"] = alphabetize_attributes(token["data"])`
			`return self.disallowed_token(token)`

			`elif token_type == "Comment":`
			`if not self.strip_html_comments:`
			`return token`
			`else:`
			`return None`

			`elif token_type == "Characters":`
			`return self.sanitize_characters(token)`

			`else:`
			`return token`

			`def sanitize_characters(self, token):`
			`"""Handles Characters tokens`

			`Our overridden tokenizer doesn't do anything with entities. However,`
			that means that the serializer will convert all ``&`` in Characters
			tokens to ``&``.

			`Since we don't want that, we extract entities here and convert them to`
			`Entity tokens so the serializer will let them be.`

			`:arg token: the Characters token to work on`

			`:returns: a list of tokens`

			`"""`
			`data = token.get("data", "")`

			`if not data:`
			`return token`

			`data = INVISIBLE_CHARACTERS_RE.sub(INVISIBLE_REPLACEMENT_CHAR, data)`
			`token["data"] = data`

			`# If there isn't a & in the data, we can return now`
			`if "&" not in data:`
			`return token`

			`new_tokens = []`

			`# For each possible entity that starts with a "&", we try to extract an`
			`# actual entity and re-tokenize accordingly`
			`for part in html5lib_shim.next_possible_entity(data):`
			`if not part:`
			`continue`

			`if part.startswith("&"):`
			`entity = html5lib_shim.match_entity(part)`
			`if entity is not None:`
			`if entity == "amp":`
			`# LinkifyFilter can't match urls across token boundaries`
			`# which is problematic with & since that shows up in`
			`# querystrings all the time. This special-cases &`
			`# and converts it to a & and sticks it in as a`
			`# Characters token. It'll get merged with surrounding`
			`# tokens in the BleachSanitizerfilter.__iter__ and`
			`# escaped in the serializer.`
			`new_tokens.append({"type": "Characters", "data": "&"})`
			`else:`
			`new_tokens.append({"type": "Entity", "name": entity})`

			`# Length of the entity plus 2--one for & at the beginning`
			`# and one for ; at the end`
			`remainder = part[len(entity) + 2 :]`
			`if remainder:`
			`new_tokens.append({"type": "Characters", "data": remainder})`
			`continue`

			`new_tokens.append({"type": "Characters", "data": part})`

			`return new_tokens`

			`def sanitize_uri_value(self, value, allowed_protocols):`
			`"""Checks a uri value to see if it's allowed`

			`:arg value: the uri value to sanitize`
			`:arg allowed_protocols: list of allowed protocols`

			`:returns: allowed value or None`

			`"""`
			`# NOTE(willkg): This transforms the value into one that's easier to`
			`# match and verify, but shouldn't get returned since it's vastly`
			`# different than the original value.`

			`# Convert all character entities in the value`
			`new_value = html5lib_shim.convert_entities(value)`

			`# Nix backtick, space characters, and control characters`
			new_value = re.sub(r"[`\000-\040\177-\240\s]+", "", new_value)

			`# Remove REPLACEMENT characters`
			`new_value = new_value.replace("\ufffd", "")`

			`# Lowercase it--this breaks the value, but makes it easier to match`
			`# against`
			`new_value = new_value.lower()`

			`try:`
			`# Drop attributes with uri values that have protocols that aren't`
			`# allowed`
			`parsed = urlparse(new_value)`
			`except ValueError:`
			`# URI is impossible to parse, therefore it's not allowed`
			`return None`

			`if parsed.scheme:`
			`# If urlparse found a scheme, check that`
			`if parsed.scheme in allowed_protocols:`
			`return value`

			`else:`
			`# Allow uris that are just an anchor`
			`if new_value.startswith("#"):`
			`return value`

			`# Handle protocols that urlparse doesn't recognize like "myprotocol"`
			`if ":" in new_value and new_value.split(":")[0] in allowed_protocols:`
			`return value`

			`# If there's no protocol/scheme specified, then assume it's "http"`
			`# and see if that's allowed`
			`if "http" in allowed_protocols:`
			`return value`

			`return None`

			`def allow_token(self, token):`
			`"""Handles the case where we're allowing the tag"""`
			`if "data" in token:`
			`# Loop through all the attributes and drop the ones that are not`
			`# allowed, are unsafe or break other rules. Additionally, fix`
			`# attribute values that need fixing.`
			`#`
			`# At the end of this loop, we have the final set of attributes`
			`# we're keeping.`
			`attrs = {}`
			`for namespaced_name, val in token["data"].items():`
			`namespace, name = namespaced_name`

			`# Drop attributes that are not explicitly allowed`
			`#`
			`# NOTE(willkg): We pass in the attribute name--not a namespaced`
			`# name.`
			`if not self.attr_filter(token["name"], name, val):`
			`continue`

			`# Drop attributes with uri values that use a disallowed protocol`
			`# Sanitize attributes with uri values`
			`if namespaced_name in self.attr_val_is_uri:`
			`new_value = self.sanitize_uri_value(val, self.allowed_protocols)`
			`if new_value is None:`
			`continue`
			`val = new_value`

			`# Drop values in svg attrs with non-local IRIs`
			`if namespaced_name in self.svg_attr_val_allows_ref:`
			`new_val = re.sub(r"url\s\(\s[^#\s][^)]+?\)", " ", unescape(val))`
			`new_val = new_val.strip()`
			`if not new_val:`
			`continue`

			`else:`
			`# Replace the val with the unescaped version because`
			`# it's a iri`
			`val = new_val`

			`# Drop href and xlink:href attr for svg elements with non-local IRIs`
			`if (None, token["name"]) in self.svg_allow_local_href:`
			`if namespaced_name in [`
			`(None, "href"),`
			`(html5lib_shim.namespaces["xlink"], "href"),`
			`]:`
			`if re.search(r"^\s*[^#\s]", val):`
			`continue`

			`# If it's a style attribute, sanitize it`
			`if namespaced_name == (None, "style"):`
			`val = self.sanitize_css(val)`

			`# At this point, we want to keep the attribute, so add it in`
			`attrs[namespaced_name] = val`

			`token["data"] = alphabetize_attributes(attrs)`

			`return token`

			`def disallowed_token(self, token):`
			`token_type = token["type"]`
			`if token_type == "EndTag":`
			`token["data"] = "</%s>" % token["name"]`

			`elif token["data"]:`
			`assert token_type in ("StartTag", "EmptyTag")`
			`attrs = []`
			`for (ns, name), v in token["data"].items():`
			`# If we end up with a namespace, but no name, switch them so we`
			`# have a valid name to use.`
			`if ns and not name:`
			`ns, name = name, ns`

			`# Figure out namespaced name if the namespace is appropriate`
			`# and exists; if the ns isn't in prefixes, then drop it.`
			`if ns is None or ns not in html5lib_shim.prefixes:`
			`namespaced_name = name`
			`else:`
			`namespaced_name = "%s:%s" % (html5lib_shim.prefixes[ns], name)`

			`attrs.append(`
			`' %s="%s"'`
			`% (`
			`namespaced_name,`
			`# NOTE(willkg): HTMLSerializer escapes attribute values`
			`# already, so if we do it here (like HTMLSerializer does),`
			`# then we end up double-escaping.`
			`v,`
			`)`
			`)`
			`token["data"] = "<%s%s>" % (token["name"], "".join(attrs))`

			`else:`
			`token["data"] = "<%s>" % token["name"]`

			`if token.get("selfClosing"):`
			`token["data"] = token["data"][:-1] + "/>"`

			`token["type"] = "Characters"`

			`del token["name"]`
			`return token`

			`def sanitize_css(self, style):`
			`"""Sanitizes css in style tags"""`
			`# Convert entities in the style so that it can be parsed as CSS`
			`style = html5lib_shim.convert_entities(style)`

			`# Drop any url values before we do anything else`
			`style = re.compile(r"url\s\(\s[^\s)]+?\s\)\s").sub(" ", style)`

			`# The gauntlet of sanitization`

			`# Validate the css in the style tag and if it's not valid, then drop`
			`# the whole thing.`
			`parts = style.split(";")`
			`gauntlet = re.compile(`
			`r"""^( # consider a style attribute value as composed of:`
			`[/:,#%!.\s\w] # a non-newline character`
			`\|\w-\w # 3 characters in the form \w-\w`
			`\|'[\s\w]+'\s* # a single quoted string of [\s\w]+ with trailing space`
			`\|"[\s\w]+" # a double quoted string of [\s\w]+`
			`\|\([\d,%\.\s]+\) # a parenthesized string of one or more digits, commas, periods, ...`
			`)*$""", # ... percent signs, or whitespace e.g. from 'color: hsl(30,100%,50%)'`
			`flags=re.U \| re.VERBOSE,`
			`)`

			`for part in parts:`
			`if not gauntlet.match(part):`
			`return ""`

			`if not re.match(r"^\s([-\w]+\s:[^:;](;\s\|$))*$", style):`
			`return ""`

			`clean = []`
			`for prop, value in re.findall(r"([-\w]+)\s:\s([^:;]*)", style):`
			`if not value:`
			`continue`

			`if prop.lower() in self.allowed_css_properties:`
			`clean.append(prop + ": " + value + ";")`

			`elif prop.lower() in self.allowed_svg_properties:`
			`clean.append(prop + ": " + value + ";")`

			`return " ".join(clean)`