Uploaded Test files
This commit is contained in:
		
							parent
							
								
									f584ad9d97
								
							
						
					
					
						commit
						2e81cb7d99
					
				
					 16627 changed files with 2065359 additions and 102444 deletions
				
			
		| 
						 | 
				
			
			@ -0,0 +1,66 @@
 | 
			
		|||
Credits
 | 
			
		||||
=======
 | 
			
		||||
 | 
			
		||||
``html5lib`` is written and maintained by:
 | 
			
		||||
 | 
			
		||||
- James Graham
 | 
			
		||||
- Sam Sneddon
 | 
			
		||||
- Łukasz Langa
 | 
			
		||||
- Will Kahn-Greene
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Patches and suggestions
 | 
			
		||||
-----------------------
 | 
			
		||||
(In chronological order, by first commit:)
 | 
			
		||||
 | 
			
		||||
- Anne van Kesteren
 | 
			
		||||
- Lachlan Hunt
 | 
			
		||||
- lantis63
 | 
			
		||||
- Sam Ruby
 | 
			
		||||
- Thomas Broyer
 | 
			
		||||
- Tim Fletcher
 | 
			
		||||
- Mark Pilgrim
 | 
			
		||||
- Ryan King
 | 
			
		||||
- Philip Taylor
 | 
			
		||||
- Edward Z. Yang
 | 
			
		||||
- fantasai
 | 
			
		||||
- Philip Jägenstedt
 | 
			
		||||
- Ms2ger
 | 
			
		||||
- Mohammad Taha Jahangir
 | 
			
		||||
- Andy Wingo
 | 
			
		||||
- Andreas Madsack
 | 
			
		||||
- Karim Valiev
 | 
			
		||||
- Juan Carlos Garcia Segovia
 | 
			
		||||
- Mike West
 | 
			
		||||
- Marc DM
 | 
			
		||||
- Simon Sapin
 | 
			
		||||
- Michael[tm] Smith
 | 
			
		||||
- Ritwik Gupta
 | 
			
		||||
- Marc Abramowitz
 | 
			
		||||
- Tony Lopes
 | 
			
		||||
- lilbludevil
 | 
			
		||||
- Kevin
 | 
			
		||||
- Drew Hubl
 | 
			
		||||
- Austin Kumbera
 | 
			
		||||
- Jim Baker
 | 
			
		||||
- Jon Dufresne
 | 
			
		||||
- Donald Stufft
 | 
			
		||||
- Alex Gaynor
 | 
			
		||||
- Nik Nyby
 | 
			
		||||
- Jakub Wilk
 | 
			
		||||
- Sigmund Cherem
 | 
			
		||||
- Gabi Davar
 | 
			
		||||
- Florian Mounier
 | 
			
		||||
- neumond
 | 
			
		||||
- Vitalik Verhovodov
 | 
			
		||||
- Kovid Goyal
 | 
			
		||||
- Adam Chainz
 | 
			
		||||
- John Vandenberg
 | 
			
		||||
- Eric Amorde
 | 
			
		||||
- Benedikt Morbach
 | 
			
		||||
- Jonathan Vanasco
 | 
			
		||||
- Tom Most
 | 
			
		||||
- Ville Skyttä
 | 
			
		||||
- Hugo van Kemenade
 | 
			
		||||
- Mark Vasilkov
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
pip
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,552 @@
 | 
			
		|||
Metadata-Version: 2.1
 | 
			
		||||
Name: html5lib
 | 
			
		||||
Version: 1.1
 | 
			
		||||
Summary: HTML parser based on the WHATWG HTML specification
 | 
			
		||||
Home-page: https://github.com/html5lib/html5lib-python
 | 
			
		||||
Maintainer: James Graham
 | 
			
		||||
Maintainer-email: james@hoppipolla.co.uk
 | 
			
		||||
License: MIT License
 | 
			
		||||
Platform: UNKNOWN
 | 
			
		||||
Classifier: Development Status :: 5 - Production/Stable
 | 
			
		||||
Classifier: Intended Audience :: Developers
 | 
			
		||||
Classifier: License :: OSI Approved :: MIT License
 | 
			
		||||
Classifier: Operating System :: OS Independent
 | 
			
		||||
Classifier: Programming Language :: Python
 | 
			
		||||
Classifier: Programming Language :: Python :: 2
 | 
			
		||||
Classifier: Programming Language :: Python :: 2.7
 | 
			
		||||
Classifier: Programming Language :: Python :: 3
 | 
			
		||||
Classifier: Programming Language :: Python :: 3.5
 | 
			
		||||
Classifier: Programming Language :: Python :: 3.6
 | 
			
		||||
Classifier: Programming Language :: Python :: 3.7
 | 
			
		||||
Classifier: Programming Language :: Python :: 3.8
 | 
			
		||||
Classifier: Programming Language :: Python :: Implementation :: CPython
 | 
			
		||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
 | 
			
		||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
 | 
			
		||||
Classifier: Topic :: Text Processing :: Markup :: HTML
 | 
			
		||||
Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*
 | 
			
		||||
Requires-Dist: six (>=1.9)
 | 
			
		||||
Requires-Dist: webencodings
 | 
			
		||||
Provides-Extra: all
 | 
			
		||||
Requires-Dist: genshi ; extra == 'all'
 | 
			
		||||
Requires-Dist: chardet (>=2.2) ; extra == 'all'
 | 
			
		||||
Requires-Dist: lxml ; (platform_python_implementation == 'CPython') and extra == 'all'
 | 
			
		||||
Provides-Extra: chardet
 | 
			
		||||
Requires-Dist: chardet (>=2.2) ; extra == 'chardet'
 | 
			
		||||
Provides-Extra: genshi
 | 
			
		||||
Requires-Dist: genshi ; extra == 'genshi'
 | 
			
		||||
Provides-Extra: lxml
 | 
			
		||||
Requires-Dist: lxml ; (platform_python_implementation == 'CPython') and extra == 'lxml'
 | 
			
		||||
 | 
			
		||||
html5lib
 | 
			
		||||
========
 | 
			
		||||
 | 
			
		||||
.. image:: https://travis-ci.org/html5lib/html5lib-python.svg?branch=master
 | 
			
		||||
    :target: https://travis-ci.org/html5lib/html5lib-python
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
html5lib is a pure-python library for parsing HTML. It is designed to
 | 
			
		||||
conform to the WHATWG HTML specification, as is implemented by all major
 | 
			
		||||
web browsers.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Usage
 | 
			
		||||
-----
 | 
			
		||||
 | 
			
		||||
Simple usage follows this pattern:
 | 
			
		||||
 | 
			
		||||
.. code-block:: python
 | 
			
		||||
 | 
			
		||||
  import html5lib
 | 
			
		||||
  with open("mydocument.html", "rb") as f:
 | 
			
		||||
      document = html5lib.parse(f)
 | 
			
		||||
 | 
			
		||||
or:
 | 
			
		||||
 | 
			
		||||
.. code-block:: python
 | 
			
		||||
 | 
			
		||||
  import html5lib
 | 
			
		||||
  document = html5lib.parse("<p>Hello World!")
 | 
			
		||||
 | 
			
		||||
By default, the ``document`` will be an ``xml.etree`` element instance.
 | 
			
		||||
Whenever possible, html5lib chooses the accelerated ``ElementTree``
 | 
			
		||||
implementation (i.e. ``xml.etree.cElementTree`` on Python 2.x).
 | 
			
		||||
 | 
			
		||||
Two other tree types are supported: ``xml.dom.minidom`` and
 | 
			
		||||
``lxml.etree``. To use an alternative format, specify the name of
 | 
			
		||||
a treebuilder:
 | 
			
		||||
 | 
			
		||||
.. code-block:: python
 | 
			
		||||
 | 
			
		||||
  import html5lib
 | 
			
		||||
  with open("mydocument.html", "rb") as f:
 | 
			
		||||
      lxml_etree_document = html5lib.parse(f, treebuilder="lxml")
 | 
			
		||||
 | 
			
		||||
When using with ``urllib2`` (Python 2), the charset from HTTP should be
 | 
			
		||||
pass into html5lib as follows:
 | 
			
		||||
 | 
			
		||||
.. code-block:: python
 | 
			
		||||
 | 
			
		||||
  from contextlib import closing
 | 
			
		||||
  from urllib2 import urlopen
 | 
			
		||||
  import html5lib
 | 
			
		||||
 | 
			
		||||
  with closing(urlopen("http://example.com/")) as f:
 | 
			
		||||
      document = html5lib.parse(f, transport_encoding=f.info().getparam("charset"))
 | 
			
		||||
 | 
			
		||||
When using with ``urllib.request`` (Python 3), the charset from HTTP
 | 
			
		||||
should be pass into html5lib as follows:
 | 
			
		||||
 | 
			
		||||
.. code-block:: python
 | 
			
		||||
 | 
			
		||||
  from urllib.request import urlopen
 | 
			
		||||
  import html5lib
 | 
			
		||||
 | 
			
		||||
  with urlopen("http://example.com/") as f:
 | 
			
		||||
      document = html5lib.parse(f, transport_encoding=f.info().get_content_charset())
 | 
			
		||||
 | 
			
		||||
To have more control over the parser, create a parser object explicitly.
 | 
			
		||||
For instance, to make the parser raise exceptions on parse errors, use:
 | 
			
		||||
 | 
			
		||||
.. code-block:: python
 | 
			
		||||
 | 
			
		||||
  import html5lib
 | 
			
		||||
  with open("mydocument.html", "rb") as f:
 | 
			
		||||
      parser = html5lib.HTMLParser(strict=True)
 | 
			
		||||
      document = parser.parse(f)
 | 
			
		||||
 | 
			
		||||
When you're instantiating parser objects explicitly, pass a treebuilder
 | 
			
		||||
class as the ``tree`` keyword argument to use an alternative document
 | 
			
		||||
format:
 | 
			
		||||
 | 
			
		||||
.. code-block:: python
 | 
			
		||||
 | 
			
		||||
  import html5lib
 | 
			
		||||
  parser = html5lib.HTMLParser(tree=html5lib.getTreeBuilder("dom"))
 | 
			
		||||
  minidom_document = parser.parse("<p>Hello World!")
 | 
			
		||||
 | 
			
		||||
More documentation is available at https://html5lib.readthedocs.io/.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Installation
 | 
			
		||||
------------
 | 
			
		||||
 | 
			
		||||
html5lib works on CPython 2.7+, CPython 3.5+ and PyPy. To install:
 | 
			
		||||
 | 
			
		||||
.. code-block:: bash
 | 
			
		||||
 | 
			
		||||
    $ pip install html5lib
 | 
			
		||||
 | 
			
		||||
The goal is to support a (non-strict) superset of the versions that `pip
 | 
			
		||||
supports
 | 
			
		||||
<https://pip.pypa.io/en/stable/installing/#python-and-os-compatibility>`_.
 | 
			
		||||
 | 
			
		||||
Optional Dependencies
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
The following third-party libraries may be used for additional
 | 
			
		||||
functionality:
 | 
			
		||||
 | 
			
		||||
- ``lxml`` is supported as a tree format (for both building and
 | 
			
		||||
  walking) under CPython (but *not* PyPy where it is known to cause
 | 
			
		||||
  segfaults);
 | 
			
		||||
 | 
			
		||||
- ``genshi`` has a treewalker (but not builder); and
 | 
			
		||||
 | 
			
		||||
- ``chardet`` can be used as a fallback when character encoding cannot
 | 
			
		||||
  be determined.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Bugs
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
Please report any bugs on the `issue tracker
 | 
			
		||||
<https://github.com/html5lib/html5lib-python/issues>`_.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Tests
 | 
			
		||||
-----
 | 
			
		||||
 | 
			
		||||
Unit tests require the ``pytest`` and ``mock`` libraries and can be
 | 
			
		||||
run using the ``py.test`` command in the root directory.
 | 
			
		||||
 | 
			
		||||
Test data are contained in a separate `html5lib-tests
 | 
			
		||||
<https://github.com/html5lib/html5lib-tests>`_ repository and included
 | 
			
		||||
as a submodule, thus for git checkouts they must be initialized::
 | 
			
		||||
 | 
			
		||||
  $ git submodule init
 | 
			
		||||
  $ git submodule update
 | 
			
		||||
 | 
			
		||||
If you have all compatible Python implementations available on your
 | 
			
		||||
system, you can run tests on all of them using the ``tox`` utility,
 | 
			
		||||
which can be found on PyPI.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Questions?
 | 
			
		||||
----------
 | 
			
		||||
 | 
			
		||||
There's a mailing list available for support on Google Groups,
 | 
			
		||||
`html5lib-discuss <http://groups.google.com/group/html5lib-discuss>`_,
 | 
			
		||||
though you may get a quicker response asking on IRC in `#whatwg on
 | 
			
		||||
irc.freenode.net <http://wiki.whatwg.org/wiki/IRC>`_.
 | 
			
		||||
 | 
			
		||||
Change Log
 | 
			
		||||
----------
 | 
			
		||||
 | 
			
		||||
1.1
 | 
			
		||||
~~~
 | 
			
		||||
 | 
			
		||||
UNRELEASED
 | 
			
		||||
 | 
			
		||||
Breaking changes:
 | 
			
		||||
 | 
			
		||||
* Drop support for Python 3.3. (#358)
 | 
			
		||||
* Drop support for Python 3.4. (#421)
 | 
			
		||||
 | 
			
		||||
Deprecations:
 | 
			
		||||
 | 
			
		||||
* Deprecate the ``html5lib`` sanitizer (``html5lib.serialize(sanitize=True)`` and
 | 
			
		||||
  ``html5lib.filters.sanitizer``). We recommend users migrate to `Bleach
 | 
			
		||||
  <https://github.com/mozilla/bleach>`. Please let us know if Bleach doesn't suffice for your
 | 
			
		||||
  use. (#443)
 | 
			
		||||
 | 
			
		||||
Other changes:
 | 
			
		||||
 | 
			
		||||
* Try to import from ``collections.abc`` to remove DeprecationWarning and ensure
 | 
			
		||||
  ``html5lib`` keeps working in future Python versions. (#403)
 | 
			
		||||
* Drop optional ``datrie`` dependency. (#442)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
1.0.1
 | 
			
		||||
~~~~~
 | 
			
		||||
 | 
			
		||||
Released on December 7, 2017
 | 
			
		||||
 | 
			
		||||
Breaking changes:
 | 
			
		||||
 | 
			
		||||
* Drop support for Python 2.6. (#330) (Thank you, Hugo, Will Kahn-Greene!)
 | 
			
		||||
* Remove ``utils/spider.py`` (#353) (Thank you, Jon Dufresne!)
 | 
			
		||||
 | 
			
		||||
Features:
 | 
			
		||||
 | 
			
		||||
* Improve documentation. (#300, #307) (Thank you, Jon Dufresne, Tom Most,
 | 
			
		||||
  Will Kahn-Greene!)
 | 
			
		||||
* Add iframe seamless boolean attribute. (Thank you, Ritwik Gupta!)
 | 
			
		||||
* Add itemscope as a boolean attribute. (#194) (Thank you, Jonathan Vanasco!)
 | 
			
		||||
* Support Python 3.6. (#333) (Thank you, Jon Dufresne!)
 | 
			
		||||
* Add CI support for Windows using AppVeyor. (Thank you, John Vandenberg!)
 | 
			
		||||
* Improve testing and CI and add code coverage (#323, #334), (Thank you, Jon
 | 
			
		||||
  Dufresne, John Vandenberg, Sam Sneddon, Will Kahn-Greene!)
 | 
			
		||||
* Semver-compliant version number.
 | 
			
		||||
 | 
			
		||||
Bug fixes:
 | 
			
		||||
 | 
			
		||||
* Add support for setuptools < 18.5 to support environment markers. (Thank you,
 | 
			
		||||
  John Vandenberg!)
 | 
			
		||||
* Add explicit dependency for six >= 1.9. (Thank you, Eric Amorde!)
 | 
			
		||||
* Fix regexes to work with Python 3.7 regex adjustments. (#318, #379) (Thank
 | 
			
		||||
  you, Benedikt Morbach, Ville Skyttä, Mark Vasilkov!)
 | 
			
		||||
* Fix alphabeticalattributes filter namespace bug. (#324) (Thank you, Will
 | 
			
		||||
  Kahn-Greene!)
 | 
			
		||||
* Include license file in generated wheel package. (#350) (Thank you, Jon
 | 
			
		||||
  Dufresne!)
 | 
			
		||||
* Fix annotation-xml typo. (#339) (Thank you, Will Kahn-Greene!)
 | 
			
		||||
* Allow uppercase hex chararcters in CSS colour check. (#377) (Thank you,
 | 
			
		||||
  Komal Dembla, Hugo!)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
1.0
 | 
			
		||||
~~~
 | 
			
		||||
 | 
			
		||||
Released and unreleased on December 7, 2017. Badly packaged release.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
0.999999999/1.0b10
 | 
			
		||||
~~~~~~~~~~~~~~~~~~
 | 
			
		||||
 | 
			
		||||
Released on July 15, 2016
 | 
			
		||||
 | 
			
		||||
* Fix attribute order going to the tree builder to be document order
 | 
			
		||||
  instead of reverse document order(!).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
0.99999999/1.0b9
 | 
			
		||||
~~~~~~~~~~~~~~~~
 | 
			
		||||
 | 
			
		||||
Released on July 14, 2016
 | 
			
		||||
 | 
			
		||||
* **Added ordereddict as a mandatory dependency on Python 2.6.**
 | 
			
		||||
 | 
			
		||||
* Added ``lxml``, ``genshi``, ``datrie``, ``charade``, and ``all``
 | 
			
		||||
  extras that will do the right thing based on the specific
 | 
			
		||||
  interpreter implementation.
 | 
			
		||||
 | 
			
		||||
* Now requires the ``mock`` package for the testsuite.
 | 
			
		||||
 | 
			
		||||
* Cease supporting DATrie under PyPy.
 | 
			
		||||
 | 
			
		||||
* **Remove PullDOM support, as this hasn't ever been properly
 | 
			
		||||
  tested, doesn't entirely work, and as far as I can tell is
 | 
			
		||||
  completely unused by anyone.**
 | 
			
		||||
 | 
			
		||||
* Move testsuite to ``py.test``.
 | 
			
		||||
 | 
			
		||||
* **Fix #124: move to webencodings for decoding the input byte stream;
 | 
			
		||||
  this makes html5lib compliant with the Encoding Standard, and
 | 
			
		||||
  introduces a required dependency on webencodings.**
 | 
			
		||||
 | 
			
		||||
* **Cease supporting Python 3.2 (in both CPython and PyPy forms).**
 | 
			
		||||
 | 
			
		||||
* **Fix comments containing double-dash with lxml 3.5 and above.**
 | 
			
		||||
 | 
			
		||||
* **Use scripting disabled by default (as we don't implement
 | 
			
		||||
  scripting).**
 | 
			
		||||
 | 
			
		||||
* **Fix #11, avoiding the XSS bug potentially caused by serializer
 | 
			
		||||
  allowing attribute values to be escaped out of in old browser versions,
 | 
			
		||||
  changing the quote_attr_values option on serializer to take one of
 | 
			
		||||
  three values, "always" (the old True value), "legacy" (the new option,
 | 
			
		||||
  and the new default), and "spec" (the old False value, and the old
 | 
			
		||||
  default).**
 | 
			
		||||
 | 
			
		||||
* **Fix #72 by rewriting the sanitizer to apply only to treewalkers
 | 
			
		||||
  (instead of the tokenizer); as such, this will require amending all
 | 
			
		||||
  callers of it to use it via the treewalker API.**
 | 
			
		||||
 | 
			
		||||
* **Drop support of charade, now that chardet is supported once more.**
 | 
			
		||||
 | 
			
		||||
* **Replace the charset keyword argument on parse and related methods
 | 
			
		||||
  with a set of keyword arguments: override_encoding, transport_encoding,
 | 
			
		||||
  same_origin_parent_encoding, likely_encoding, and default_encoding.**
 | 
			
		||||
 | 
			
		||||
* **Move filters._base, treebuilder._base, and treewalkers._base to .base
 | 
			
		||||
  to clarify their status as public.**
 | 
			
		||||
 | 
			
		||||
* **Get rid of the sanitizer package. Merge sanitizer.sanitize into the
 | 
			
		||||
  sanitizer.htmlsanitizer module and move that to sanitizer. This means
 | 
			
		||||
  anyone who used sanitizer.sanitize or sanitizer.HTMLSanitizer needs no
 | 
			
		||||
  code changes.**
 | 
			
		||||
 | 
			
		||||
* **Rename treewalkers.lxmletree to .etree_lxml and
 | 
			
		||||
  treewalkers.genshistream to .genshi to have a consistent API.**
 | 
			
		||||
 | 
			
		||||
* Move a whole load of stuff (inputstream, ihatexml, trie, tokenizer,
 | 
			
		||||
  utils) to be underscore prefixed to clarify their status as private.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
0.9999999/1.0b8
 | 
			
		||||
~~~~~~~~~~~~~~~
 | 
			
		||||
 | 
			
		||||
Released on September 10, 2015
 | 
			
		||||
 | 
			
		||||
* Fix #195: fix the sanitizer to drop broken URLs (it threw an
 | 
			
		||||
  exception between 0.9999 and 0.999999).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
0.999999/1.0b7
 | 
			
		||||
~~~~~~~~~~~~~~
 | 
			
		||||
 | 
			
		||||
Released on July 7, 2015
 | 
			
		||||
 | 
			
		||||
* Fix #189: fix the sanitizer to allow relative URLs again (as it did
 | 
			
		||||
  prior to 0.9999/1.0b5).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
0.99999/1.0b6
 | 
			
		||||
~~~~~~~~~~~~~
 | 
			
		||||
 | 
			
		||||
Released on April 30, 2015
 | 
			
		||||
 | 
			
		||||
* Fix #188: fix the sanitizer to not throw an exception when sanitizing
 | 
			
		||||
  bogus data URLs.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
0.9999/1.0b5
 | 
			
		||||
~~~~~~~~~~~~
 | 
			
		||||
 | 
			
		||||
Released on April 29, 2015
 | 
			
		||||
 | 
			
		||||
* Fix #153: Sanitizer fails to treat some attributes as URLs. Despite how
 | 
			
		||||
  this sounds, this has no known security implications.  No known version
 | 
			
		||||
  of IE (5.5 to current), Firefox (3 to current), Safari (6 to current),
 | 
			
		||||
  Chrome (1 to current), or Opera (12 to current) will run any script
 | 
			
		||||
  provided in these attributes.
 | 
			
		||||
 | 
			
		||||
* Pass error message to the ParseError exception in strict parsing mode.
 | 
			
		||||
 | 
			
		||||
* Allow data URIs in the sanitizer, with a whitelist of content-types.
 | 
			
		||||
 | 
			
		||||
* Add support for Python implementations that don't support lone
 | 
			
		||||
  surrogates (read: Jython). Fixes #2.
 | 
			
		||||
 | 
			
		||||
* Remove localization of error messages. This functionality was totally
 | 
			
		||||
  unused (and untested that everything was localizable), so we may as
 | 
			
		||||
  well follow numerous browsers in not supporting translating technical
 | 
			
		||||
  strings.
 | 
			
		||||
 | 
			
		||||
* Expose treewalkers.pprint as a public API.
 | 
			
		||||
 | 
			
		||||
* Add a documentEncoding property to HTML5Parser, fix #121.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
0.999
 | 
			
		||||
~~~~~
 | 
			
		||||
 | 
			
		||||
Released on December 23, 2013
 | 
			
		||||
 | 
			
		||||
* Fix #127: add work-around for CPython issue #20007: .read(0) on
 | 
			
		||||
  http.client.HTTPResponse drops the rest of the content.
 | 
			
		||||
 | 
			
		||||
* Fix #115: lxml treewalker can now deal with fragments containing, at
 | 
			
		||||
  their root level, text nodes with non-ASCII characters on Python 2.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
0.99
 | 
			
		||||
~~~~
 | 
			
		||||
 | 
			
		||||
Released on September 10, 2013
 | 
			
		||||
 | 
			
		||||
* No library changes from 1.0b3; released as 0.99 as pip has changed
 | 
			
		||||
  behaviour from 1.4 to avoid installing pre-release versions per
 | 
			
		||||
  PEP 440.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
1.0b3
 | 
			
		||||
~~~~~
 | 
			
		||||
 | 
			
		||||
Released on July 24, 2013
 | 
			
		||||
 | 
			
		||||
* Removed ``RecursiveTreeWalker`` from ``treewalkers._base``. Any
 | 
			
		||||
  implementation using it should be moved to
 | 
			
		||||
  ``NonRecursiveTreeWalker``, as everything bundled with html5lib has
 | 
			
		||||
  for years.
 | 
			
		||||
 | 
			
		||||
* Fix #67 so that ``BufferedStream`` to correctly returns a bytes
 | 
			
		||||
  object, thereby fixing any case where html5lib is passed a
 | 
			
		||||
  non-seekable RawIOBase-like object.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
1.0b2
 | 
			
		||||
~~~~~
 | 
			
		||||
 | 
			
		||||
Released on June 27, 2013
 | 
			
		||||
 | 
			
		||||
* Removed reordering of attributes within the serializer. There is now
 | 
			
		||||
  an ``alphabetical_attributes`` option which preserves the previous
 | 
			
		||||
  behaviour through a new filter. This allows attribute order to be
 | 
			
		||||
  preserved through html5lib if the tree builder preserves order.
 | 
			
		||||
 | 
			
		||||
* Removed ``dom2sax`` from DOM treebuilders. It has been replaced by
 | 
			
		||||
  ``treeadapters.sax.to_sax`` which is generic and supports any
 | 
			
		||||
  treewalker; it also resolves all known bugs with ``dom2sax``.
 | 
			
		||||
 | 
			
		||||
* Fix treewalker assertions on hitting bytes strings on
 | 
			
		||||
  Python 2. Previous to 1.0b1, treewalkers coped with mixed
 | 
			
		||||
  bytes/unicode data on Python 2; this reintroduces this prior
 | 
			
		||||
  behaviour on Python 2. Behaviour is unchanged on Python 3.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
1.0b1
 | 
			
		||||
~~~~~
 | 
			
		||||
 | 
			
		||||
Released on May 17, 2013
 | 
			
		||||
 | 
			
		||||
* Implementation updated to implement the `HTML specification
 | 
			
		||||
  <http://www.whatwg.org/specs/web-apps/current-work/>`_ as of 5th May
 | 
			
		||||
  2013 (`SVN <http://svn.whatwg.org/webapps/>`_ revision r7867).
 | 
			
		||||
 | 
			
		||||
* Python 3.2+ supported in a single codebase using the ``six`` library.
 | 
			
		||||
 | 
			
		||||
* Removed support for Python 2.5 and older.
 | 
			
		||||
 | 
			
		||||
* Removed the deprecated Beautiful Soup 3 treebuilder.
 | 
			
		||||
  ``beautifulsoup4`` can use ``html5lib`` as a parser instead. Note that
 | 
			
		||||
  since it doesn't support namespaces, foreign content like SVG and
 | 
			
		||||
  MathML is parsed incorrectly.
 | 
			
		||||
 | 
			
		||||
* Removed ``simpletree`` from the package. The default tree builder is
 | 
			
		||||
  now ``etree`` (using the ``xml.etree.cElementTree`` implementation if
 | 
			
		||||
  available, and ``xml.etree.ElementTree`` otherwise).
 | 
			
		||||
 | 
			
		||||
* Removed the ``XHTMLSerializer`` as it never actually guaranteed its
 | 
			
		||||
  output was well-formed XML, and hence provided little of use.
 | 
			
		||||
 | 
			
		||||
* Removed default DOM treebuilder, so ``html5lib.treebuilders.dom`` is no
 | 
			
		||||
  longer supported. ``html5lib.treebuilders.getTreeBuilder("dom")`` will
 | 
			
		||||
  return the default DOM treebuilder, which uses ``xml.dom.minidom``.
 | 
			
		||||
 | 
			
		||||
* Optional heuristic character encoding detection now based on
 | 
			
		||||
  ``charade`` for Python 2.6 - 3.3 compatibility.
 | 
			
		||||
 | 
			
		||||
* Optional ``Genshi`` treewalker support fixed.
 | 
			
		||||
 | 
			
		||||
* Many bugfixes, including:
 | 
			
		||||
 | 
			
		||||
  * #33: null in attribute value breaks XML AttValue;
 | 
			
		||||
 | 
			
		||||
  * #4: nested, indirect descendant, <button> causes infinite loop;
 | 
			
		||||
 | 
			
		||||
  * `Google Code 215
 | 
			
		||||
    <http://code.google.com/p/html5lib/issues/detail?id=215>`_: Properly
 | 
			
		||||
    detect seekable streams;
 | 
			
		||||
 | 
			
		||||
  * `Google Code 206
 | 
			
		||||
    <http://code.google.com/p/html5lib/issues/detail?id=206>`_: add
 | 
			
		||||
    support for <video preload=...>, <audio preload=...>;
 | 
			
		||||
 | 
			
		||||
  * `Google Code 205
 | 
			
		||||
    <http://code.google.com/p/html5lib/issues/detail?id=205>`_: add
 | 
			
		||||
    support for <video poster=...>;
 | 
			
		||||
 | 
			
		||||
  * `Google Code 202
 | 
			
		||||
    <http://code.google.com/p/html5lib/issues/detail?id=202>`_: Unicode
 | 
			
		||||
    file breaks InputStream.
 | 
			
		||||
 | 
			
		||||
* Source code is now mostly PEP 8 compliant.
 | 
			
		||||
 | 
			
		||||
* Test harness has been improved and now depends on ``nose``.
 | 
			
		||||
 | 
			
		||||
* Documentation updated and moved to https://html5lib.readthedocs.io/.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
0.95
 | 
			
		||||
~~~~
 | 
			
		||||
 | 
			
		||||
Released on February 11, 2012
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
0.90
 | 
			
		||||
~~~~
 | 
			
		||||
 | 
			
		||||
Released on January 17, 2010
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
0.11.1
 | 
			
		||||
~~~~~~
 | 
			
		||||
 | 
			
		||||
Released on June 12, 2008
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
0.11
 | 
			
		||||
~~~~
 | 
			
		||||
 | 
			
		||||
Released on June 10, 2008
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
0.10
 | 
			
		||||
~~~~
 | 
			
		||||
 | 
			
		||||
Released on October 7, 2007
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
0.9
 | 
			
		||||
~~~
 | 
			
		||||
 | 
			
		||||
Released on March 11, 2007
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
0.2
 | 
			
		||||
~~~
 | 
			
		||||
 | 
			
		||||
Released on January 8, 2007
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,41 @@
 | 
			
		|||
html5lib-1.1.dist-info/AUTHORS.rst,sha256=DrNAMifoDpuQyJn-KW-H6K8Tt2a5rKnV2UF4-DRrGUI,983
 | 
			
		||||
html5lib-1.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
 | 
			
		||||
html5lib-1.1.dist-info/LICENSE,sha256=FqOZkWGekvGGgJMtoqkZn999ld8-yu3FLqBiGKq6_W8,1084
 | 
			
		||||
html5lib-1.1.dist-info/METADATA,sha256=Y3w-nd_22HQnQRy3yypVsV_ke2FF94uUD4-vGpc2DnI,16076
 | 
			
		||||
html5lib-1.1.dist-info/RECORD,,
 | 
			
		||||
html5lib-1.1.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 | 
			
		||||
html5lib-1.1.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
 | 
			
		||||
html5lib-1.1.dist-info/top_level.txt,sha256=XEX6CHpskSmvjJB4tP6m4Q5NYXhIf_0ceMc0PNbzJPQ,9
 | 
			
		||||
html5lib/__init__.py,sha256=pWnYcfZ69wNLrdQL7bpr49FUi8O8w0KhKCOHsyRgYGQ,1143
 | 
			
		||||
html5lib/_ihatexml.py,sha256=ifOwF7pXqmyThIXc3boWc96s4MDezqRrRVp7FwDYUFs,16728
 | 
			
		||||
html5lib/_inputstream.py,sha256=IKuMiY8rzb7pqIGCpbvTqsxysLEpgEHWYvYEFu4LUAI,32300
 | 
			
		||||
html5lib/_tokenizer.py,sha256=WvJQa2Mli4NtTmhLXkX8Jy5FcWttqCaiDTiKyaw8D-k,77028
 | 
			
		||||
html5lib/_trie/__init__.py,sha256=nqfgO910329BEVJ5T4psVwQtjd2iJyEXQ2-X8c1YxwU,109
 | 
			
		||||
html5lib/_trie/_base.py,sha256=CaybYyMro8uERQYjby2tTeSUatnWDfWroUN9N7ety5w,1013
 | 
			
		||||
html5lib/_trie/py.py,sha256=zg7RZSHxJ8mLmuI_7VEIV8AomISrgkvqCP477AgXaG0,1763
 | 
			
		||||
html5lib/_utils.py,sha256=AxAJSG15eyarCgKMnlUwzs1X6jFHXqEvhlYEOxAFmis,4919
 | 
			
		||||
html5lib/constants.py,sha256=Ll-yzLU_jcjyAI_h57zkqZ7aQWE5t5xA4y_jQgoUUhw,83464
 | 
			
		||||
html5lib/filters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 | 
			
		||||
html5lib/filters/alphabeticalattributes.py,sha256=lViZc2JMCclXi_5gduvmdzrRxtO5Xo9ONnbHBVCsykU,919
 | 
			
		||||
html5lib/filters/base.py,sha256=z-IU9ZAYjpsVsqmVt7kuWC63jR11hDMr6CVrvuao8W0,286
 | 
			
		||||
html5lib/filters/inject_meta_charset.py,sha256=egDXUEHXmAG9504xz0K6ALDgYkvUrC2q15YUVeNlVQg,2945
 | 
			
		||||
html5lib/filters/lint.py,sha256=upXATs6By7cot7o0bnNqR15sPq2Fn6Vnjvoy3gyO_rY,3631
 | 
			
		||||
html5lib/filters/optionaltags.py,sha256=8lWT75J0aBOHmPgfmqTHSfPpPMp01T84NKu0CRedxcE,10588
 | 
			
		||||
html5lib/filters/sanitizer.py,sha256=XGNSdzIqDTaHot1V-rRj1V_XOolApJ7n95tHP9JcgNU,26885
 | 
			
		||||
html5lib/filters/whitespace.py,sha256=8eWqZxd4UC4zlFGW6iyY6f-2uuT8pOCSALc3IZt7_t4,1214
 | 
			
		||||
html5lib/html5parser.py,sha256=w5hZJh0cvD3g4CS196DiTmuGpSKCMYe1GS46-yf_WZQ,117174
 | 
			
		||||
html5lib/serializer.py,sha256=K2kfoLyMPMFPfdusfR30SrxNkf0mJB92-P5_RntyaaI,15747
 | 
			
		||||
html5lib/treeadapters/__init__.py,sha256=18hyI-at2aBsdKzpwRwa5lGF1ipgctaTYXoU9En2ZQg,650
 | 
			
		||||
html5lib/treeadapters/genshi.py,sha256=CH27pAsDKmu4ZGkAUrwty7u0KauGLCZRLPMzaO3M5vo,1715
 | 
			
		||||
html5lib/treeadapters/sax.py,sha256=BKS8woQTnKiqeffHsxChUqL4q2ZR_wb5fc9MJ3zQC8s,1776
 | 
			
		||||
html5lib/treebuilders/__init__.py,sha256=AysSJyvPfikCMMsTVvaxwkgDieELD5dfR8FJIAuq7hY,3592
 | 
			
		||||
html5lib/treebuilders/base.py,sha256=oeZNGEB-kt90YJGVH05gb5a8E7ids2AbYwGRsVCieWk,14553
 | 
			
		||||
html5lib/treebuilders/dom.py,sha256=22whb0C71zXIsai5mamg6qzBEiigcBIvaDy4Asw3at0,8925
 | 
			
		||||
html5lib/treebuilders/etree.py,sha256=EbmHx-wQ-11MVucTPtF7Ul92-mQGN3Udu_KfDn-Ifhk,12824
 | 
			
		||||
html5lib/treebuilders/etree_lxml.py,sha256=OazDHZGO_q4FnVs4Dhs4hzzn2JwGAOs-rfV8LAlUGW4,14754
 | 
			
		||||
html5lib/treewalkers/__init__.py,sha256=OBPtc1TU5mGyy18QDMxKEyYEz0wxFUUNj5v0-XgmYhY,5719
 | 
			
		||||
html5lib/treewalkers/base.py,sha256=ouiOsuSzvI0KgzdWP8PlxIaSNs9falhbiinAEc_UIJY,7476
 | 
			
		||||
html5lib/treewalkers/dom.py,sha256=EHyFR8D8lYNnyDU9lx_IKigVJRyecUGua0mOi7HBukc,1413
 | 
			
		||||
html5lib/treewalkers/etree.py,sha256=gkD4tfEfRWPsEGvgHHJxZmKZXUvBzVVGz3v5C_MIiOE,4539
 | 
			
		||||
html5lib/treewalkers/etree_lxml.py,sha256=eLedbn6nPjlpebibsWVijey7WEpzDwxU3ubwUoudBuA,6345
 | 
			
		||||
html5lib/treewalkers/genshi.py,sha256=4D2PECZ5n3ZN3qu3jMl9yY7B81jnQApBQSVlfaIuYbA,2309
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,6 @@
 | 
			
		|||
Wheel-Version: 1.0
 | 
			
		||||
Generator: bdist_wheel (0.34.2)
 | 
			
		||||
Root-Is-Purelib: true
 | 
			
		||||
Tag: py2-none-any
 | 
			
		||||
Tag: py3-none-any
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
html5lib
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue