Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
58
venv/Lib/site-packages/parso/__init__.py
Normal file
58
venv/Lib/site-packages/parso/__init__.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
r"""
|
||||
Parso is a Python parser that supports error recovery and round-trip parsing
|
||||
for different Python versions (in multiple Python versions). Parso is also able
|
||||
to list multiple syntax errors in your python file.
|
||||
|
||||
Parso has been battle-tested by jedi_. It was pulled out of jedi to be useful
|
||||
for other projects as well.
|
||||
|
||||
Parso consists of a small API to parse Python and analyse the syntax tree.
|
||||
|
||||
.. _jedi: https://github.com/davidhalter/jedi
|
||||
|
||||
A simple example:
|
||||
|
||||
>>> import parso
|
||||
>>> module = parso.parse('hello + 1', version="3.6")
|
||||
>>> expr = module.children[0]
|
||||
>>> expr
|
||||
PythonNode(arith_expr, [<Name: hello@1,0>, <Operator: +>, <Number: 1>])
|
||||
>>> print(expr.get_code())
|
||||
hello + 1
|
||||
>>> name = expr.children[0]
|
||||
>>> name
|
||||
<Name: hello@1,0>
|
||||
>>> name.end_pos
|
||||
(1, 5)
|
||||
>>> expr.end_pos
|
||||
(1, 9)
|
||||
|
||||
To list multiple issues:
|
||||
|
||||
>>> grammar = parso.load_grammar()
|
||||
>>> module = grammar.parse('foo +\nbar\ncontinue')
|
||||
>>> error1, error2 = grammar.iter_errors(module)
|
||||
>>> error1.message
|
||||
'SyntaxError: invalid syntax'
|
||||
>>> error2.message
|
||||
"SyntaxError: 'continue' not properly in loop"
|
||||
"""
|
||||
|
||||
from parso.parser import ParserSyntaxError
|
||||
from parso.grammar import Grammar, load_grammar
|
||||
from parso.utils import split_lines, python_bytes_to_unicode
|
||||
|
||||
|
||||
__version__ = '0.7.1'
|
||||
|
||||
|
||||
def parse(code=None, **kwargs):
|
||||
"""
|
||||
A utility function to avoid loading grammars.
|
||||
Params are documented in :py:meth:`parso.Grammar.parse`.
|
||||
|
||||
:param str version: The version used by :py:func:`parso.load_grammar`.
|
||||
"""
|
||||
version = kwargs.pop('version', None)
|
||||
grammar = load_grammar(version=version)
|
||||
return grammar.parse(code, **kwargs)
|
19
venv/Lib/site-packages/parso/__init__.pyi
Normal file
19
venv/Lib/site-packages/parso/__init__.pyi
Normal file
|
@ -0,0 +1,19 @@
|
|||
from typing import Any, Optional, Union
|
||||
|
||||
from parso.grammar import Grammar as Grammar, load_grammar as load_grammar
|
||||
from parso.parser import ParserSyntaxError as ParserSyntaxError
|
||||
from parso.utils import python_bytes_to_unicode as python_bytes_to_unicode, split_lines as split_lines
|
||||
|
||||
__version__: str = ...
|
||||
|
||||
def parse(
|
||||
code: Optional[Union[str, bytes]],
|
||||
*,
|
||||
version: Optional[str] = None,
|
||||
error_recovery: bool = True,
|
||||
path: Optional[str] = None,
|
||||
start_symbol: Optional[str] = None,
|
||||
cache: bool = False,
|
||||
diff_cache: bool = False,
|
||||
cache_path: Optional[str] = None,
|
||||
) -> Any: ...
|
BIN
venv/Lib/site-packages/parso/__pycache__/__init__.cpython-36.pyc
Normal file
BIN
venv/Lib/site-packages/parso/__pycache__/__init__.cpython-36.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
venv/Lib/site-packages/parso/__pycache__/cache.cpython-36.pyc
Normal file
BIN
venv/Lib/site-packages/parso/__pycache__/cache.cpython-36.pyc
Normal file
Binary file not shown.
BIN
venv/Lib/site-packages/parso/__pycache__/file_io.cpython-36.pyc
Normal file
BIN
venv/Lib/site-packages/parso/__pycache__/file_io.cpython-36.pyc
Normal file
Binary file not shown.
BIN
venv/Lib/site-packages/parso/__pycache__/grammar.cpython-36.pyc
Normal file
BIN
venv/Lib/site-packages/parso/__pycache__/grammar.cpython-36.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
venv/Lib/site-packages/parso/__pycache__/parser.cpython-36.pyc
Normal file
BIN
venv/Lib/site-packages/parso/__pycache__/parser.cpython-36.pyc
Normal file
Binary file not shown.
BIN
venv/Lib/site-packages/parso/__pycache__/tree.cpython-36.pyc
Normal file
BIN
venv/Lib/site-packages/parso/__pycache__/tree.cpython-36.pyc
Normal file
Binary file not shown.
BIN
venv/Lib/site-packages/parso/__pycache__/utils.cpython-36.pyc
Normal file
BIN
venv/Lib/site-packages/parso/__pycache__/utils.cpython-36.pyc
Normal file
Binary file not shown.
101
venv/Lib/site-packages/parso/_compatibility.py
Normal file
101
venv/Lib/site-packages/parso/_compatibility.py
Normal file
|
@ -0,0 +1,101 @@
|
|||
"""
|
||||
To ensure compatibility from Python ``2.7`` - ``3.3``, a module has been
|
||||
created. Clearly there is huge need to use conforming syntax.
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import platform
|
||||
|
||||
# unicode function
|
||||
try:
|
||||
unicode = unicode
|
||||
except NameError:
|
||||
unicode = str
|
||||
|
||||
is_pypy = platform.python_implementation() == 'PyPy'
|
||||
|
||||
|
||||
def use_metaclass(meta, *bases):
|
||||
""" Create a class with a metaclass. """
|
||||
if not bases:
|
||||
bases = (object,)
|
||||
return meta("HackClass", bases, {})
|
||||
|
||||
|
||||
try:
|
||||
encoding = sys.stdout.encoding
|
||||
if encoding is None:
|
||||
encoding = 'utf-8'
|
||||
except AttributeError:
|
||||
encoding = 'ascii'
|
||||
|
||||
|
||||
def u(string):
|
||||
"""Cast to unicode DAMMIT!
|
||||
Written because Python2 repr always implicitly casts to a string, so we
|
||||
have to cast back to a unicode (and we know that we always deal with valid
|
||||
unicode, because we check that in the beginning).
|
||||
"""
|
||||
if sys.version_info.major >= 3:
|
||||
return str(string)
|
||||
|
||||
if not isinstance(string, unicode):
|
||||
return unicode(str(string), 'UTF-8')
|
||||
return string
|
||||
|
||||
|
||||
try:
|
||||
# Python 3.3+
|
||||
FileNotFoundError = FileNotFoundError
|
||||
except NameError:
|
||||
# Python 2.7 (both IOError + OSError)
|
||||
FileNotFoundError = EnvironmentError
|
||||
try:
|
||||
# Python 3.3+
|
||||
PermissionError = PermissionError
|
||||
except NameError:
|
||||
# Python 2.7 (both IOError + OSError)
|
||||
PermissionError = EnvironmentError
|
||||
|
||||
|
||||
def utf8_repr(func):
|
||||
"""
|
||||
``__repr__`` methods in Python 2 don't allow unicode objects to be
|
||||
returned. Therefore cast them to utf-8 bytes in this decorator.
|
||||
"""
|
||||
def wrapper(self):
|
||||
result = func(self)
|
||||
if isinstance(result, unicode):
|
||||
return result.encode('utf-8')
|
||||
else:
|
||||
return result
|
||||
|
||||
if sys.version_info.major >= 3:
|
||||
return func
|
||||
else:
|
||||
return wrapper
|
||||
|
||||
|
||||
if sys.version_info < (3, 5):
|
||||
"""
|
||||
A super-minimal shim around listdir that behave like
|
||||
scandir for the information we need.
|
||||
"""
|
||||
class _DirEntry:
|
||||
|
||||
def __init__(self, name, basepath):
|
||||
self.name = name
|
||||
self.basepath = basepath
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
return os.path.join(self.basepath, self.name)
|
||||
|
||||
def stat(self):
|
||||
# won't follow symlinks
|
||||
return os.lstat(os.path.join(self.basepath, self.name))
|
||||
|
||||
def scandir(dir):
|
||||
return [_DirEntry(name, dir) for name in os.listdir(dir)]
|
||||
else:
|
||||
from os import scandir
|
276
venv/Lib/site-packages/parso/cache.py
Normal file
276
venv/Lib/site-packages/parso/cache.py
Normal file
|
@ -0,0 +1,276 @@
|
|||
import time
|
||||
import os
|
||||
import sys
|
||||
import hashlib
|
||||
import gc
|
||||
import shutil
|
||||
import platform
|
||||
import errno
|
||||
import logging
|
||||
import warnings
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except:
|
||||
import pickle
|
||||
|
||||
from parso._compatibility import FileNotFoundError, PermissionError, scandir
|
||||
from parso.file_io import FileIO
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
_CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes
|
||||
"""
|
||||
Cached files should survive at least a few minutes.
|
||||
"""
|
||||
|
||||
_CACHED_FILE_MAXIMUM_SURVIVAL = 60 * 60 * 24 * 30
|
||||
"""
|
||||
Maximum time for a cached file to survive if it is not
|
||||
accessed within.
|
||||
"""
|
||||
|
||||
_CACHED_SIZE_TRIGGER = 600
|
||||
"""
|
||||
This setting limits the amount of cached files. It's basically a way to start
|
||||
garbage collection.
|
||||
|
||||
The reasoning for this limit being as big as it is, is the following:
|
||||
|
||||
Numpy, Pandas, Matplotlib and Tensorflow together use about 500 files. This
|
||||
makes Jedi use ~500mb of memory. Since we might want a bit more than those few
|
||||
libraries, we just increase it a bit.
|
||||
"""
|
||||
|
||||
_PICKLE_VERSION = 33
|
||||
"""
|
||||
Version number (integer) for file system cache.
|
||||
|
||||
Increment this number when there are any incompatible changes in
|
||||
the parser tree classes. For example, the following changes
|
||||
are regarded as incompatible.
|
||||
|
||||
- A class name is changed.
|
||||
- A class is moved to another module.
|
||||
- A __slot__ of a class is changed.
|
||||
"""
|
||||
|
||||
_VERSION_TAG = '%s-%s%s-%s' % (
|
||||
platform.python_implementation(),
|
||||
sys.version_info[0],
|
||||
sys.version_info[1],
|
||||
_PICKLE_VERSION
|
||||
)
|
||||
"""
|
||||
Short name for distinguish Python implementations and versions.
|
||||
|
||||
It's like `sys.implementation.cache_tag` but for Python2
|
||||
we generate something similar. See:
|
||||
http://docs.python.org/3/library/sys.html#sys.implementation
|
||||
"""
|
||||
|
||||
|
||||
def _get_default_cache_path():
|
||||
if platform.system().lower() == 'windows':
|
||||
dir_ = os.path.join(os.getenv('LOCALAPPDATA')
|
||||
or os.path.expanduser('~'), 'Parso', 'Parso')
|
||||
elif platform.system().lower() == 'darwin':
|
||||
dir_ = os.path.join('~', 'Library', 'Caches', 'Parso')
|
||||
else:
|
||||
dir_ = os.path.join(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso')
|
||||
return os.path.expanduser(dir_)
|
||||
|
||||
|
||||
_default_cache_path = _get_default_cache_path()
|
||||
"""
|
||||
The path where the cache is stored.
|
||||
|
||||
On Linux, this defaults to ``~/.cache/parso/``, on OS X to
|
||||
``~/Library/Caches/Parso/`` and on Windows to ``%LOCALAPPDATA%\\Parso\\Parso\\``.
|
||||
On Linux, if environment variable ``$XDG_CACHE_HOME`` is set,
|
||||
``$XDG_CACHE_HOME/parso`` is used instead of the default one.
|
||||
"""
|
||||
|
||||
_CACHE_CLEAR_THRESHOLD = 60 * 60 * 24
|
||||
|
||||
def _get_cache_clear_lock(cache_path = None):
|
||||
"""
|
||||
The path where the cache lock is stored.
|
||||
|
||||
Cache lock will prevent continous cache clearing and only allow garbage
|
||||
collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD).
|
||||
"""
|
||||
cache_path = cache_path or _get_default_cache_path()
|
||||
return FileIO(os.path.join(cache_path, "PARSO-CACHE-LOCK"))
|
||||
|
||||
|
||||
parser_cache = {}
|
||||
|
||||
|
||||
class _NodeCacheItem(object):
|
||||
def __init__(self, node, lines, change_time=None):
|
||||
self.node = node
|
||||
self.lines = lines
|
||||
if change_time is None:
|
||||
change_time = time.time()
|
||||
self.change_time = change_time
|
||||
self.last_used = change_time
|
||||
|
||||
|
||||
def load_module(hashed_grammar, file_io, cache_path=None):
|
||||
"""
|
||||
Returns a module or None, if it fails.
|
||||
"""
|
||||
p_time = file_io.get_last_modified()
|
||||
if p_time is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
module_cache_item = parser_cache[hashed_grammar][file_io.path]
|
||||
if p_time <= module_cache_item.change_time:
|
||||
module_cache_item.last_used = time.time()
|
||||
return module_cache_item.node
|
||||
except KeyError:
|
||||
return _load_from_file_system(
|
||||
hashed_grammar,
|
||||
file_io.path,
|
||||
p_time,
|
||||
cache_path=cache_path
|
||||
)
|
||||
|
||||
|
||||
def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None):
|
||||
cache_path = _get_hashed_path(hashed_grammar, path, cache_path=cache_path)
|
||||
try:
|
||||
try:
|
||||
if p_time > os.path.getmtime(cache_path):
|
||||
# Cache is outdated
|
||||
return None
|
||||
except OSError as e:
|
||||
if e.errno == errno.ENOENT:
|
||||
# In Python 2 instead of an IOError here we get an OSError.
|
||||
raise FileNotFoundError
|
||||
else:
|
||||
raise
|
||||
|
||||
with open(cache_path, 'rb') as f:
|
||||
gc.disable()
|
||||
try:
|
||||
module_cache_item = pickle.load(f)
|
||||
finally:
|
||||
gc.enable()
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
else:
|
||||
_set_cache_item(hashed_grammar, path, module_cache_item)
|
||||
LOG.debug('pickle loaded: %s', path)
|
||||
return module_cache_item.node
|
||||
|
||||
|
||||
def _set_cache_item(hashed_grammar, path, module_cache_item):
|
||||
if sum(len(v) for v in parser_cache.values()) >= _CACHED_SIZE_TRIGGER:
|
||||
# Garbage collection of old cache files.
|
||||
# We are basically throwing everything away that hasn't been accessed
|
||||
# in 10 minutes.
|
||||
cutoff_time = time.time() - _CACHED_FILE_MINIMUM_SURVIVAL
|
||||
for key, path_to_item_map in parser_cache.items():
|
||||
parser_cache[key] = {
|
||||
path: node_item
|
||||
for path, node_item in path_to_item_map.items()
|
||||
if node_item.last_used > cutoff_time
|
||||
}
|
||||
|
||||
parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
|
||||
|
||||
|
||||
def try_to_save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None):
|
||||
path = file_io.path
|
||||
try:
|
||||
p_time = None if path is None else file_io.get_last_modified()
|
||||
except OSError:
|
||||
p_time = None
|
||||
pickling = False
|
||||
|
||||
item = _NodeCacheItem(module, lines, p_time)
|
||||
_set_cache_item(hashed_grammar, path, item)
|
||||
if pickling and path is not None:
|
||||
try:
|
||||
_save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)
|
||||
except PermissionError:
|
||||
# It's not really a big issue if the cache cannot be saved to the
|
||||
# file system. It's still in RAM in that case. However we should
|
||||
# still warn the user that this is happening.
|
||||
warnings.warn(
|
||||
'Tried to save a file to %s, but got permission denied.',
|
||||
Warning
|
||||
)
|
||||
else:
|
||||
_remove_cache_and_update_lock(cache_path=cache_path)
|
||||
|
||||
|
||||
def _save_to_file_system(hashed_grammar, path, item, cache_path=None):
|
||||
with open(_get_hashed_path(hashed_grammar, path, cache_path=cache_path), 'wb') as f:
|
||||
pickle.dump(item, f, pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
|
||||
def clear_cache(cache_path=None):
|
||||
if cache_path is None:
|
||||
cache_path = _default_cache_path
|
||||
shutil.rmtree(cache_path)
|
||||
parser_cache.clear()
|
||||
|
||||
|
||||
def clear_inactive_cache(
|
||||
cache_path=None,
|
||||
inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL,
|
||||
):
|
||||
if cache_path is None:
|
||||
cache_path = _get_default_cache_path()
|
||||
if not os.path.exists(cache_path):
|
||||
return False
|
||||
for version_path in os.listdir(cache_path):
|
||||
version_path = os.path.join(cache_path, version_path)
|
||||
if not os.path.isdir(version_path):
|
||||
continue
|
||||
for file in scandir(version_path):
|
||||
if (
|
||||
file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL
|
||||
<= time.time()
|
||||
):
|
||||
try:
|
||||
os.remove(file.path)
|
||||
except OSError: # silently ignore all failures
|
||||
continue
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def _remove_cache_and_update_lock(cache_path = None):
|
||||
lock = _get_cache_clear_lock(cache_path=cache_path)
|
||||
clear_lock_time = lock.get_last_modified()
|
||||
if (
|
||||
clear_lock_time is None # first time
|
||||
or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time()
|
||||
):
|
||||
if not lock._touch():
|
||||
# First make sure that as few as possible other cleanup jobs also
|
||||
# get started. There is still a race condition but it's probably
|
||||
# not a big problem.
|
||||
return False
|
||||
|
||||
clear_inactive_cache(cache_path = cache_path)
|
||||
|
||||
def _get_hashed_path(hashed_grammar, path, cache_path=None):
|
||||
directory = _get_cache_directory_path(cache_path=cache_path)
|
||||
|
||||
file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest()
|
||||
return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash))
|
||||
|
||||
|
||||
def _get_cache_directory_path(cache_path=None):
|
||||
if cache_path is None:
|
||||
cache_path = _default_cache_path
|
||||
directory = os.path.join(cache_path, _VERSION_TAG)
|
||||
if not os.path.exists(directory):
|
||||
os.makedirs(directory)
|
||||
return directory
|
47
venv/Lib/site-packages/parso/file_io.py
Normal file
47
venv/Lib/site-packages/parso/file_io.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
import os
|
||||
from parso._compatibility import FileNotFoundError
|
||||
|
||||
|
||||
class FileIO(object):
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
|
||||
def read(self): # Returns bytes/str
|
||||
# We would like to read unicode here, but we cannot, because we are not
|
||||
# sure if it is a valid unicode file. Therefore just read whatever is
|
||||
# here.
|
||||
with open(self.path, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
def get_last_modified(self):
|
||||
"""
|
||||
Returns float - timestamp or None, if path doesn't exist.
|
||||
"""
|
||||
try:
|
||||
return os.path.getmtime(self.path)
|
||||
except OSError:
|
||||
# Might raise FileNotFoundError, OSError for Python 2
|
||||
return None
|
||||
|
||||
def _touch(self):
|
||||
try:
|
||||
os.utime(self.path, None)
|
||||
except FileNotFoundError:
|
||||
try:
|
||||
file = open(self.path, 'a')
|
||||
file.close()
|
||||
except (OSError, IOError): # TODO Maybe log this?
|
||||
return False
|
||||
return True
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s)' % (self.__class__.__name__, self.path)
|
||||
|
||||
|
||||
class KnownContentFileIO(FileIO):
|
||||
def __init__(self, path, content):
|
||||
super(KnownContentFileIO, self).__init__(path)
|
||||
self._content = content
|
||||
|
||||
def read(self):
|
||||
return self._content
|
260
venv/Lib/site-packages/parso/grammar.py
Normal file
260
venv/Lib/site-packages/parso/grammar.py
Normal file
|
@ -0,0 +1,260 @@
|
|||
import hashlib
|
||||
import os
|
||||
|
||||
from parso._compatibility import FileNotFoundError, is_pypy
|
||||
from parso.pgen2 import generate_grammar
|
||||
from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
|
||||
from parso.python.diff import DiffParser
|
||||
from parso.python.tokenize import tokenize_lines, tokenize
|
||||
from parso.python.token import PythonTokenTypes
|
||||
from parso.cache import parser_cache, load_module, try_to_save_module
|
||||
from parso.parser import BaseParser
|
||||
from parso.python.parser import Parser as PythonParser
|
||||
from parso.python.errors import ErrorFinderConfig
|
||||
from parso.python import pep8
|
||||
from parso.file_io import FileIO, KnownContentFileIO
|
||||
from parso.normalizer import RefactoringNormalizer
|
||||
|
||||
_loaded_grammars = {}
|
||||
|
||||
|
||||
class Grammar(object):
|
||||
"""
|
||||
:py:func:`parso.load_grammar` returns instances of this class.
|
||||
|
||||
Creating custom none-python grammars by calling this is not supported, yet.
|
||||
"""
|
||||
#:param text: A BNF representation of your grammar.
|
||||
_error_normalizer_config = None
|
||||
_token_namespace = None
|
||||
_default_normalizer_config = pep8.PEP8NormalizerConfig()
|
||||
|
||||
def __init__(self, text, tokenizer, parser=BaseParser, diff_parser=None):
|
||||
self._pgen_grammar = generate_grammar(
|
||||
text,
|
||||
token_namespace=self._get_token_namespace()
|
||||
)
|
||||
self._parser = parser
|
||||
self._tokenizer = tokenizer
|
||||
self._diff_parser = diff_parser
|
||||
self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest()
|
||||
|
||||
def parse(self, code=None, **kwargs):
|
||||
"""
|
||||
If you want to parse a Python file you want to start here, most likely.
|
||||
|
||||
If you need finer grained control over the parsed instance, there will be
|
||||
other ways to access it.
|
||||
|
||||
:param str code: A unicode or bytes string. When it's not possible to
|
||||
decode bytes to a string, returns a
|
||||
:py:class:`UnicodeDecodeError`.
|
||||
:param bool error_recovery: If enabled, any code will be returned. If
|
||||
it is invalid, it will be returned as an error node. If disabled,
|
||||
you will get a ParseError when encountering syntax errors in your
|
||||
code.
|
||||
:param str start_symbol: The grammar rule (nonterminal) that you want
|
||||
to parse. Only allowed to be used when error_recovery is False.
|
||||
:param str path: The path to the file you want to open. Only needed for caching.
|
||||
:param bool cache: Keeps a copy of the parser tree in RAM and on disk
|
||||
if a path is given. Returns the cached trees if the corresponding
|
||||
files on disk have not changed. Note that this stores pickle files
|
||||
on your file system (e.g. for Linux in ``~/.cache/parso/``).
|
||||
:param bool diff_cache: Diffs the cached python module against the new
|
||||
code and tries to parse only the parts that have changed. Returns
|
||||
the same (changed) module that is found in cache. Using this option
|
||||
requires you to not do anything anymore with the cached modules
|
||||
under that path, because the contents of it might change. This
|
||||
option is still somewhat experimental. If you want stability,
|
||||
please don't use it.
|
||||
:param bool cache_path: If given saves the parso cache in this
|
||||
directory. If not given, defaults to the default cache places on
|
||||
each platform.
|
||||
|
||||
:return: A subclass of :py:class:`parso.tree.NodeOrLeaf`. Typically a
|
||||
:py:class:`parso.python.tree.Module`.
|
||||
"""
|
||||
if 'start_pos' in kwargs:
|
||||
raise TypeError("parse() got an unexpected keyword argument.")
|
||||
return self._parse(code=code, **kwargs)
|
||||
|
||||
def _parse(self, code=None, error_recovery=True, path=None,
|
||||
start_symbol=None, cache=False, diff_cache=False,
|
||||
cache_path=None, file_io=None, start_pos=(1, 0)):
|
||||
"""
|
||||
Wanted python3.5 * operator and keyword only arguments. Therefore just
|
||||
wrap it all.
|
||||
start_pos here is just a parameter internally used. Might be public
|
||||
sometime in the future.
|
||||
"""
|
||||
if code is None and path is None and file_io is None:
|
||||
raise TypeError("Please provide either code or a path.")
|
||||
|
||||
if start_symbol is None:
|
||||
start_symbol = self._start_nonterminal
|
||||
|
||||
if error_recovery and start_symbol != 'file_input':
|
||||
raise NotImplementedError("This is currently not implemented.")
|
||||
|
||||
if file_io is None:
|
||||
if code is None:
|
||||
file_io = FileIO(path)
|
||||
else:
|
||||
file_io = KnownContentFileIO(path, code)
|
||||
|
||||
if cache and file_io.path is not None:
|
||||
module_node = load_module(self._hashed, file_io, cache_path=cache_path)
|
||||
if module_node is not None:
|
||||
return module_node
|
||||
|
||||
if code is None:
|
||||
code = file_io.read()
|
||||
code = python_bytes_to_unicode(code)
|
||||
|
||||
lines = split_lines(code, keepends=True)
|
||||
if diff_cache:
|
||||
if self._diff_parser is None:
|
||||
raise TypeError("You have to define a diff parser to be able "
|
||||
"to use this option.")
|
||||
try:
|
||||
module_cache_item = parser_cache[self._hashed][file_io.path]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
module_node = module_cache_item.node
|
||||
old_lines = module_cache_item.lines
|
||||
if old_lines == lines:
|
||||
return module_node
|
||||
|
||||
new_node = self._diff_parser(
|
||||
self._pgen_grammar, self._tokenizer, module_node
|
||||
).update(
|
||||
old_lines=old_lines,
|
||||
new_lines=lines
|
||||
)
|
||||
try_to_save_module(self._hashed, file_io, new_node, lines,
|
||||
# Never pickle in pypy, it's slow as hell.
|
||||
pickling=cache and not is_pypy,
|
||||
cache_path=cache_path)
|
||||
return new_node
|
||||
|
||||
tokens = self._tokenizer(lines, start_pos=start_pos)
|
||||
|
||||
p = self._parser(
|
||||
self._pgen_grammar,
|
||||
error_recovery=error_recovery,
|
||||
start_nonterminal=start_symbol
|
||||
)
|
||||
root_node = p.parse(tokens=tokens)
|
||||
|
||||
if cache or diff_cache:
|
||||
try_to_save_module(self._hashed, file_io, root_node, lines,
|
||||
# Never pickle in pypy, it's slow as hell.
|
||||
pickling=cache and not is_pypy,
|
||||
cache_path=cache_path)
|
||||
return root_node
|
||||
|
||||
def _get_token_namespace(self):
|
||||
ns = self._token_namespace
|
||||
if ns is None:
|
||||
raise ValueError("The token namespace should be set.")
|
||||
return ns
|
||||
|
||||
def iter_errors(self, node):
|
||||
"""
|
||||
Given a :py:class:`parso.tree.NodeOrLeaf` returns a generator of
|
||||
:py:class:`parso.normalizer.Issue` objects. For Python this is
|
||||
a list of syntax/indentation errors.
|
||||
"""
|
||||
if self._error_normalizer_config is None:
|
||||
raise ValueError("No error normalizer specified for this grammar.")
|
||||
|
||||
return self._get_normalizer_issues(node, self._error_normalizer_config)
|
||||
|
||||
def refactor(self, base_node, node_to_str_map):
|
||||
return RefactoringNormalizer(node_to_str_map).walk(base_node)
|
||||
|
||||
def _get_normalizer(self, normalizer_config):
|
||||
if normalizer_config is None:
|
||||
normalizer_config = self._default_normalizer_config
|
||||
if normalizer_config is None:
|
||||
raise ValueError("You need to specify a normalizer, because "
|
||||
"there's no default normalizer for this tree.")
|
||||
return normalizer_config.create_normalizer(self)
|
||||
|
||||
def _normalize(self, node, normalizer_config=None):
|
||||
"""
|
||||
TODO this is not public, yet.
|
||||
The returned code will be normalized, e.g. PEP8 for Python.
|
||||
"""
|
||||
normalizer = self._get_normalizer(normalizer_config)
|
||||
return normalizer.walk(node)
|
||||
|
||||
def _get_normalizer_issues(self, node, normalizer_config=None):
|
||||
normalizer = self._get_normalizer(normalizer_config)
|
||||
normalizer.walk(node)
|
||||
return normalizer.issues
|
||||
|
||||
def __repr__(self):
|
||||
nonterminals = self._pgen_grammar.nonterminal_to_dfas.keys()
|
||||
txt = ' '.join(list(nonterminals)[:3]) + ' ...'
|
||||
return '<%s:%s>' % (self.__class__.__name__, txt)
|
||||
|
||||
|
||||
class PythonGrammar(Grammar):
|
||||
_error_normalizer_config = ErrorFinderConfig()
|
||||
_token_namespace = PythonTokenTypes
|
||||
_start_nonterminal = 'file_input'
|
||||
|
||||
def __init__(self, version_info, bnf_text):
|
||||
super(PythonGrammar, self).__init__(
|
||||
bnf_text,
|
||||
tokenizer=self._tokenize_lines,
|
||||
parser=PythonParser,
|
||||
diff_parser=DiffParser
|
||||
)
|
||||
self.version_info = version_info
|
||||
|
||||
def _tokenize_lines(self, lines, **kwargs):
|
||||
return tokenize_lines(lines, self.version_info, **kwargs)
|
||||
|
||||
def _tokenize(self, code):
|
||||
# Used by Jedi.
|
||||
return tokenize(code, self.version_info)
|
||||
|
||||
|
||||
def load_grammar(**kwargs):
|
||||
"""
|
||||
Loads a :py:class:`parso.Grammar`. The default version is the current Python
|
||||
version.
|
||||
|
||||
:param str version: A python version string, e.g. ``version='3.8'``.
|
||||
:param str path: A path to a grammar file
|
||||
"""
|
||||
def load_grammar(language='python', version=None, path=None):
|
||||
if language == 'python':
|
||||
version_info = parse_version_string(version)
|
||||
|
||||
file = path or os.path.join(
|
||||
'python',
|
||||
'grammar%s%s.txt' % (version_info.major, version_info.minor)
|
||||
)
|
||||
|
||||
global _loaded_grammars
|
||||
path = os.path.join(os.path.dirname(__file__), file)
|
||||
try:
|
||||
return _loaded_grammars[path]
|
||||
except KeyError:
|
||||
try:
|
||||
with open(path) as f:
|
||||
bnf_text = f.read()
|
||||
|
||||
grammar = PythonGrammar(version_info, bnf_text)
|
||||
return _loaded_grammars.setdefault(path, grammar)
|
||||
except FileNotFoundError:
|
||||
message = "Python version %s.%s is currently not supported." % (version_info.major, version_info.minor)
|
||||
raise NotImplementedError(message)
|
||||
else:
|
||||
raise NotImplementedError("No support for language %s." % language)
|
||||
|
||||
return load_grammar(**kwargs)
|
38
venv/Lib/site-packages/parso/grammar.pyi
Normal file
38
venv/Lib/site-packages/parso/grammar.pyi
Normal file
|
@ -0,0 +1,38 @@
|
|||
from typing import Any, Callable, Generic, Optional, Sequence, TypeVar, Union
|
||||
from typing_extensions import Literal
|
||||
|
||||
from parso.utils import PythonVersionInfo
|
||||
|
||||
_Token = Any
|
||||
_NodeT = TypeVar("_NodeT")
|
||||
|
||||
class Grammar(Generic[_NodeT]):
|
||||
_default_normalizer_config: Optional[Any] = ...
|
||||
_error_normalizer_config: Optional[Any] = None
|
||||
_start_nonterminal: str = ...
|
||||
_token_namespace: Optional[str] = None
|
||||
def __init__(
|
||||
self,
|
||||
text: str,
|
||||
tokenizer: Callable[[Sequence[str], int], Sequence[_Token]],
|
||||
parser: Any = ...,
|
||||
diff_parser: Any = ...,
|
||||
) -> None: ...
|
||||
def parse(
|
||||
self,
|
||||
code: Union[str, bytes] = ...,
|
||||
error_recovery: bool = ...,
|
||||
path: Optional[str] = ...,
|
||||
start_symbol: Optional[str] = ...,
|
||||
cache: bool = ...,
|
||||
diff_cache: bool = ...,
|
||||
cache_path: Optional[str] = ...,
|
||||
) -> _NodeT: ...
|
||||
|
||||
class PythonGrammar(Grammar):
|
||||
version_info: PythonVersionInfo
|
||||
def __init__(self, version_info: PythonVersionInfo, bnf_text: str) -> None: ...
|
||||
|
||||
def load_grammar(
|
||||
language: Literal["python"] = "python", version: Optional[str] = ..., path: str = ...
|
||||
) -> Grammar: ...
|
203
venv/Lib/site-packages/parso/normalizer.py
Normal file
203
venv/Lib/site-packages/parso/normalizer.py
Normal file
|
@ -0,0 +1,203 @@
|
|||
from contextlib import contextmanager
|
||||
|
||||
from parso._compatibility import use_metaclass
|
||||
|
||||
|
||||
class _NormalizerMeta(type):
|
||||
def __new__(cls, name, bases, dct):
|
||||
new_cls = type.__new__(cls, name, bases, dct)
|
||||
new_cls.rule_value_classes = {}
|
||||
new_cls.rule_type_classes = {}
|
||||
return new_cls
|
||||
|
||||
|
||||
class Normalizer(use_metaclass(_NormalizerMeta)):
|
||||
_rule_type_instances = {}
|
||||
_rule_value_instances = {}
|
||||
|
||||
def __init__(self, grammar, config):
|
||||
self.grammar = grammar
|
||||
self._config = config
|
||||
self.issues = []
|
||||
|
||||
self._rule_type_instances = self._instantiate_rules('rule_type_classes')
|
||||
self._rule_value_instances = self._instantiate_rules('rule_value_classes')
|
||||
|
||||
def _instantiate_rules(self, attr):
|
||||
dct = {}
|
||||
for base in type(self).mro():
|
||||
rules_map = getattr(base, attr, {})
|
||||
for type_, rule_classes in rules_map.items():
|
||||
new = [rule_cls(self) for rule_cls in rule_classes]
|
||||
dct.setdefault(type_, []).extend(new)
|
||||
return dct
|
||||
|
||||
def walk(self, node):
|
||||
self.initialize(node)
|
||||
value = self.visit(node)
|
||||
self.finalize()
|
||||
return value
|
||||
|
||||
def visit(self, node):
|
||||
try:
|
||||
children = node.children
|
||||
except AttributeError:
|
||||
return self.visit_leaf(node)
|
||||
else:
|
||||
with self.visit_node(node):
|
||||
return ''.join(self.visit(child) for child in children)
|
||||
|
||||
@contextmanager
|
||||
def visit_node(self, node):
|
||||
self._check_type_rules(node)
|
||||
yield
|
||||
|
||||
def _check_type_rules(self, node):
|
||||
for rule in self._rule_type_instances.get(node.type, []):
|
||||
rule.feed_node(node)
|
||||
|
||||
def visit_leaf(self, leaf):
|
||||
self._check_type_rules(leaf)
|
||||
|
||||
for rule in self._rule_value_instances.get(leaf.value, []):
|
||||
rule.feed_node(leaf)
|
||||
|
||||
return leaf.prefix + leaf.value
|
||||
|
||||
def initialize(self, node):
|
||||
pass
|
||||
|
||||
def finalize(self):
|
||||
pass
|
||||
|
||||
def add_issue(self, node, code, message):
|
||||
issue = Issue(node, code, message)
|
||||
if issue not in self.issues:
|
||||
self.issues.append(issue)
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def register_rule(cls, **kwargs):
|
||||
"""
|
||||
Use it as a class decorator::
|
||||
|
||||
normalizer = Normalizer('grammar', 'config')
|
||||
@normalizer.register_rule(value='foo')
|
||||
class MyRule(Rule):
|
||||
error_code = 42
|
||||
"""
|
||||
return cls._register_rule(**kwargs)
|
||||
|
||||
@classmethod
|
||||
def _register_rule(cls, value=None, values=(), type=None, types=()):
|
||||
values = list(values)
|
||||
types = list(types)
|
||||
if value is not None:
|
||||
values.append(value)
|
||||
if type is not None:
|
||||
types.append(type)
|
||||
|
||||
if not values and not types:
|
||||
raise ValueError("You must register at least something.")
|
||||
|
||||
def decorator(rule_cls):
|
||||
for v in values:
|
||||
cls.rule_value_classes.setdefault(v, []).append(rule_cls)
|
||||
for t in types:
|
||||
cls.rule_type_classes.setdefault(t, []).append(rule_cls)
|
||||
return rule_cls
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
class NormalizerConfig(object):
|
||||
normalizer_class = Normalizer
|
||||
|
||||
def create_normalizer(self, grammar):
|
||||
if self.normalizer_class is None:
|
||||
return None
|
||||
|
||||
return self.normalizer_class(grammar, self)
|
||||
|
||||
|
||||
class Issue(object):
|
||||
def __init__(self, node, code, message):
|
||||
self.code = code
|
||||
"""
|
||||
An integer code that stands for the type of error.
|
||||
"""
|
||||
self.message = message
|
||||
"""
|
||||
A message (string) for the issue.
|
||||
"""
|
||||
self.start_pos = node.start_pos
|
||||
"""
|
||||
The start position position of the error as a tuple (line, column). As
|
||||
always in |parso| the first line is 1 and the first column 0.
|
||||
"""
|
||||
self.end_pos = node.end_pos
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.start_pos == other.start_pos and self.code == other.code
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.code, self.start_pos))
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s: %s>' % (self.__class__.__name__, self.code)
|
||||
|
||||
|
||||
class Rule(object):
|
||||
code = None
|
||||
message = None
|
||||
|
||||
def __init__(self, normalizer):
|
||||
self._normalizer = normalizer
|
||||
|
||||
def is_issue(self, node):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_node(self, node):
|
||||
return node
|
||||
|
||||
def _get_message(self, message, node):
|
||||
if message is None:
|
||||
message = self.message
|
||||
if message is None:
|
||||
raise ValueError("The message on the class is not set.")
|
||||
return message
|
||||
|
||||
def add_issue(self, node, code=None, message=None):
|
||||
if code is None:
|
||||
code = self.code
|
||||
if code is None:
|
||||
raise ValueError("The error code on the class is not set.")
|
||||
|
||||
message = self._get_message(message, node)
|
||||
|
||||
self._normalizer.add_issue(node, code, message)
|
||||
|
||||
def feed_node(self, node):
|
||||
if self.is_issue(node):
|
||||
issue_node = self.get_node(node)
|
||||
self.add_issue(issue_node)
|
||||
|
||||
|
||||
class RefactoringNormalizer(Normalizer):
|
||||
def __init__(self, node_to_str_map):
|
||||
self._node_to_str_map = node_to_str_map
|
||||
|
||||
def visit(self, node):
|
||||
try:
|
||||
return self._node_to_str_map[node]
|
||||
except KeyError:
|
||||
return super(RefactoringNormalizer, self).visit(node)
|
||||
|
||||
def visit_leaf(self, leaf):
|
||||
try:
|
||||
return self._node_to_str_map[leaf]
|
||||
except KeyError:
|
||||
return super(RefactoringNormalizer, self).visit_leaf(leaf)
|
211
venv/Lib/site-packages/parso/parser.py
Normal file
211
venv/Lib/site-packages/parso/parser.py
Normal file
|
@ -0,0 +1,211 @@
|
|||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Modifications:
|
||||
# Copyright David Halter and Contributors
|
||||
# Modifications are dual-licensed: MIT and PSF.
|
||||
# 99% of the code is different from pgen2, now.
|
||||
|
||||
"""
|
||||
The ``Parser`` tries to convert the available Python code in an easy to read
|
||||
format, something like an abstract syntax tree. The classes who represent this
|
||||
tree, are sitting in the :mod:`parso.tree` module.
|
||||
|
||||
The Python module ``tokenize`` is a very important part in the ``Parser``,
|
||||
because it splits the code into different words (tokens). Sometimes it looks a
|
||||
bit messy. Sorry for that! You might ask now: "Why didn't you use the ``ast``
|
||||
module for this? Well, ``ast`` does a very good job understanding proper Python
|
||||
code, but fails to work as soon as there's a single line of broken code.
|
||||
|
||||
There's one important optimization that needs to be known: Statements are not
|
||||
being parsed completely. ``Statement`` is just a representation of the tokens
|
||||
within the statement. This lowers memory usage and cpu time and reduces the
|
||||
complexity of the ``Parser`` (there's another parser sitting inside
|
||||
``Statement``, which produces ``Array`` and ``Call``).
|
||||
"""
|
||||
from parso import tree
|
||||
from parso.pgen2.generator import ReservedString
|
||||
|
||||
|
||||
class ParserSyntaxError(Exception):
|
||||
"""
|
||||
Contains error information about the parser tree.
|
||||
|
||||
May be raised as an exception.
|
||||
"""
|
||||
def __init__(self, message, error_leaf):
|
||||
self.message = message
|
||||
self.error_leaf = error_leaf
|
||||
|
||||
|
||||
class InternalParseError(Exception):
|
||||
"""
|
||||
Exception to signal the parser is stuck and error recovery didn't help.
|
||||
Basically this shouldn't happen. It's a sign that something is really
|
||||
wrong.
|
||||
"""
|
||||
|
||||
def __init__(self, msg, type_, value, start_pos):
|
||||
Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
|
||||
(msg, type_.name, value, start_pos))
|
||||
self.msg = msg
|
||||
self.type = type
|
||||
self.value = value
|
||||
self.start_pos = start_pos
|
||||
|
||||
|
||||
class Stack(list):
|
||||
def _allowed_transition_names_and_token_types(self):
|
||||
def iterate():
|
||||
# An API just for Jedi.
|
||||
for stack_node in reversed(self):
|
||||
for transition in stack_node.dfa.transitions:
|
||||
if isinstance(transition, ReservedString):
|
||||
yield transition.value
|
||||
else:
|
||||
yield transition # A token type
|
||||
|
||||
if not stack_node.dfa.is_final:
|
||||
break
|
||||
|
||||
return list(iterate())
|
||||
|
||||
|
||||
class StackNode(object):
|
||||
def __init__(self, dfa):
|
||||
self.dfa = dfa
|
||||
self.nodes = []
|
||||
|
||||
@property
|
||||
def nonterminal(self):
|
||||
return self.dfa.from_rule
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s, %s)' % (self.__class__.__name__, self.dfa, self.nodes)
|
||||
|
||||
|
||||
def _token_to_transition(grammar, type_, value):
|
||||
# Map from token to label
|
||||
if type_.contains_syntax:
|
||||
# Check for reserved words (keywords)
|
||||
try:
|
||||
return grammar.reserved_syntax_strings[value]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
return type_
|
||||
|
||||
|
||||
class BaseParser(object):
|
||||
"""Parser engine.
|
||||
|
||||
A Parser instance contains state pertaining to the current token
|
||||
sequence, and should not be used concurrently by different threads
|
||||
to parse separate token sequences.
|
||||
|
||||
See python/tokenize.py for how to get input tokens by a string.
|
||||
|
||||
When a syntax error occurs, error_recovery() is called.
|
||||
"""
|
||||
|
||||
node_map = {}
|
||||
default_node = tree.Node
|
||||
|
||||
leaf_map = {
|
||||
}
|
||||
default_leaf = tree.Leaf
|
||||
|
||||
def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False):
|
||||
self._pgen_grammar = pgen_grammar
|
||||
self._start_nonterminal = start_nonterminal
|
||||
self._error_recovery = error_recovery
|
||||
|
||||
def parse(self, tokens):
|
||||
first_dfa = self._pgen_grammar.nonterminal_to_dfas[self._start_nonterminal][0]
|
||||
self.stack = Stack([StackNode(first_dfa)])
|
||||
|
||||
for token in tokens:
|
||||
self._add_token(token)
|
||||
|
||||
while True:
|
||||
tos = self.stack[-1]
|
||||
if not tos.dfa.is_final:
|
||||
# We never broke out -- EOF is too soon -- Unfinished statement.
|
||||
# However, the error recovery might have added the token again, if
|
||||
# the stack is empty, we're fine.
|
||||
raise InternalParseError(
|
||||
"incomplete input", token.type, token.string, token.start_pos
|
||||
)
|
||||
|
||||
if len(self.stack) > 1:
|
||||
self._pop()
|
||||
else:
|
||||
return self.convert_node(tos.nonterminal, tos.nodes)
|
||||
|
||||
def error_recovery(self, token):
|
||||
if self._error_recovery:
|
||||
raise NotImplementedError("Error Recovery is not implemented")
|
||||
else:
|
||||
type_, value, start_pos, prefix = token
|
||||
error_leaf = tree.ErrorLeaf(type_, value, start_pos, prefix)
|
||||
raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)
|
||||
|
||||
def convert_node(self, nonterminal, children):
|
||||
try:
|
||||
node = self.node_map[nonterminal](children)
|
||||
except KeyError:
|
||||
node = self.default_node(nonterminal, children)
|
||||
for c in children:
|
||||
c.parent = node
|
||||
return node
|
||||
|
||||
def convert_leaf(self, type_, value, prefix, start_pos):
|
||||
try:
|
||||
return self.leaf_map[type_](value, start_pos, prefix)
|
||||
except KeyError:
|
||||
return self.default_leaf(value, start_pos, prefix)
|
||||
|
||||
def _add_token(self, token):
|
||||
"""
|
||||
This is the only core function for parsing. Here happens basically
|
||||
everything. Everything is well prepared by the parser generator and we
|
||||
only apply the necessary steps here.
|
||||
"""
|
||||
grammar = self._pgen_grammar
|
||||
stack = self.stack
|
||||
type_, value, start_pos, prefix = token
|
||||
transition = _token_to_transition(grammar, type_, value)
|
||||
|
||||
while True:
|
||||
try:
|
||||
plan = stack[-1].dfa.transitions[transition]
|
||||
break
|
||||
except KeyError:
|
||||
if stack[-1].dfa.is_final:
|
||||
self._pop()
|
||||
else:
|
||||
self.error_recovery(token)
|
||||
return
|
||||
except IndexError:
|
||||
raise InternalParseError("too much input", type_, value, start_pos)
|
||||
|
||||
stack[-1].dfa = plan.next_dfa
|
||||
|
||||
for push in plan.dfa_pushes:
|
||||
stack.append(StackNode(push))
|
||||
|
||||
leaf = self.convert_leaf(type_, value, prefix, start_pos)
|
||||
stack[-1].nodes.append(leaf)
|
||||
|
||||
def _pop(self):
|
||||
tos = self.stack.pop()
|
||||
# If there's exactly one child, return that child instead of
|
||||
# creating a new node. We still create expr_stmt and
|
||||
# file_input though, because a lot of Jedi depends on its
|
||||
# logic.
|
||||
if len(tos.nodes) == 1:
|
||||
new_node = tos.nodes[0]
|
||||
else:
|
||||
new_node = self.convert_node(tos.dfa.from_rule, tos.nodes)
|
||||
|
||||
self.stack[-1].nodes.append(new_node)
|
10
venv/Lib/site-packages/parso/pgen2/__init__.py
Normal file
10
venv/Lib/site-packages/parso/pgen2/__init__.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Modifications:
|
||||
# Copyright 2006 Google, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
# Copyright 2014 David Halter and Contributors
|
||||
# Modifications are dual-licensed: MIT and PSF.
|
||||
|
||||
from parso.pgen2.generator import generate_grammar
|
1
venv/Lib/site-packages/parso/pgen2/__init__.pyi
Normal file
1
venv/Lib/site-packages/parso/pgen2/__init__.pyi
Normal file
|
@ -0,0 +1 @@
|
|||
from parso.pgen2.generator import generate_grammar as generate_grammar
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
378
venv/Lib/site-packages/parso/pgen2/generator.py
Normal file
378
venv/Lib/site-packages/parso/pgen2/generator.py
Normal file
|
@ -0,0 +1,378 @@
|
|||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Modifications:
|
||||
# Copyright David Halter and Contributors
|
||||
# Modifications are dual-licensed: MIT and PSF.
|
||||
|
||||
"""
|
||||
This module defines the data structures used to represent a grammar.
|
||||
|
||||
Specifying grammars in pgen is possible with this grammar::
|
||||
|
||||
grammar: (NEWLINE | rule)* ENDMARKER
|
||||
rule: NAME ':' rhs NEWLINE
|
||||
rhs: items ('|' items)*
|
||||
items: item+
|
||||
item: '[' rhs ']' | atom ['+' | '*']
|
||||
atom: '(' rhs ')' | NAME | STRING
|
||||
|
||||
This grammar is self-referencing.
|
||||
|
||||
This parser generator (pgen2) was created by Guido Rossum and used for lib2to3.
|
||||
Most of the code has been refactored to make it more Pythonic. Since this was a
|
||||
"copy" of the CPython Parser parser "pgen", there was some work needed to make
|
||||
it more readable. It should also be slightly faster than the original pgen2,
|
||||
because we made some optimizations.
|
||||
"""
|
||||
|
||||
from ast import literal_eval
|
||||
|
||||
from parso.pgen2.grammar_parser import GrammarParser, NFAState
|
||||
|
||||
|
||||
class Grammar(object):
|
||||
"""
|
||||
Once initialized, this class supplies the grammar tables for the
|
||||
parsing engine implemented by parse.py. The parsing engine
|
||||
accesses the instance variables directly.
|
||||
|
||||
The only important part in this parsers are dfas and transitions between
|
||||
dfas.
|
||||
"""
|
||||
|
||||
def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
|
||||
self.nonterminal_to_dfas = rule_to_dfas # Dict[str, List[DFAState]]
|
||||
self.reserved_syntax_strings = reserved_syntax_strings
|
||||
self.start_nonterminal = start_nonterminal
|
||||
|
||||
|
||||
class DFAPlan(object):
|
||||
"""
|
||||
Plans are used for the parser to create stack nodes and do the proper
|
||||
DFA state transitions.
|
||||
"""
|
||||
def __init__(self, next_dfa, dfa_pushes=[]):
|
||||
self.next_dfa = next_dfa
|
||||
self.dfa_pushes = dfa_pushes
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
|
||||
|
||||
|
||||
class DFAState(object):
|
||||
"""
|
||||
The DFAState object is the core class for pretty much anything. DFAState
|
||||
are the vertices of an ordered graph while arcs and transitions are the
|
||||
edges.
|
||||
|
||||
Arcs are the initial edges, where most DFAStates are not connected and
|
||||
transitions are then calculated to connect the DFA state machines that have
|
||||
different nonterminals.
|
||||
"""
|
||||
def __init__(self, from_rule, nfa_set, final):
|
||||
assert isinstance(nfa_set, set)
|
||||
assert isinstance(next(iter(nfa_set)), NFAState)
|
||||
assert isinstance(final, NFAState)
|
||||
self.from_rule = from_rule
|
||||
self.nfa_set = nfa_set
|
||||
self.arcs = {} # map from terminals/nonterminals to DFAState
|
||||
# In an intermediary step we set these nonterminal arcs (which has the
|
||||
# same structure as arcs). These don't contain terminals anymore.
|
||||
self.nonterminal_arcs = {}
|
||||
|
||||
# Transitions are basically the only thing that the parser is using
|
||||
# with is_final. Everyting else is purely here to create a parser.
|
||||
self.transitions = {} #: Dict[Union[TokenType, ReservedString], DFAPlan]
|
||||
self.is_final = final in nfa_set
|
||||
|
||||
def add_arc(self, next_, label):
|
||||
assert isinstance(label, str)
|
||||
assert label not in self.arcs
|
||||
assert isinstance(next_, DFAState)
|
||||
self.arcs[label] = next_
|
||||
|
||||
def unifystate(self, old, new):
|
||||
for label, next_ in self.arcs.items():
|
||||
if next_ is old:
|
||||
self.arcs[label] = new
|
||||
|
||||
def __eq__(self, other):
|
||||
# Equality test -- ignore the nfa_set instance variable
|
||||
assert isinstance(other, DFAState)
|
||||
if self.is_final != other.is_final:
|
||||
return False
|
||||
# Can't just return self.arcs == other.arcs, because that
|
||||
# would invoke this method recursively, with cycles...
|
||||
if len(self.arcs) != len(other.arcs):
|
||||
return False
|
||||
for label, next_ in self.arcs.items():
|
||||
if next_ is not other.arcs.get(label):
|
||||
return False
|
||||
return True
|
||||
|
||||
__hash__ = None # For Py3 compatibility.
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s: %s is_final=%s>' % (
|
||||
self.__class__.__name__, self.from_rule, self.is_final
|
||||
)
|
||||
|
||||
|
||||
class ReservedString(object):
|
||||
"""
|
||||
Most grammars will have certain keywords and operators that are mentioned
|
||||
in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER).
|
||||
This class basically is the former.
|
||||
"""
|
||||
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s)' % (self.__class__.__name__, self.value)
|
||||
|
||||
|
||||
def _simplify_dfas(dfas):
|
||||
"""
|
||||
This is not theoretically optimal, but works well enough.
|
||||
Algorithm: repeatedly look for two states that have the same
|
||||
set of arcs (same labels pointing to the same nodes) and
|
||||
unify them, until things stop changing.
|
||||
|
||||
dfas is a list of DFAState instances
|
||||
"""
|
||||
changes = True
|
||||
while changes:
|
||||
changes = False
|
||||
for i, state_i in enumerate(dfas):
|
||||
for j in range(i + 1, len(dfas)):
|
||||
state_j = dfas[j]
|
||||
if state_i == state_j:
|
||||
#print " unify", i, j
|
||||
del dfas[j]
|
||||
for state in dfas:
|
||||
state.unifystate(state_j, state_i)
|
||||
changes = True
|
||||
break
|
||||
|
||||
|
||||
def _make_dfas(start, finish):
|
||||
"""
|
||||
Uses the powerset construction algorithm to create DFA states from sets of
|
||||
NFA states.
|
||||
|
||||
Also does state reduction if some states are not needed.
|
||||
"""
|
||||
# To turn an NFA into a DFA, we define the states of the DFA
|
||||
# to correspond to *sets* of states of the NFA. Then do some
|
||||
# state reduction.
|
||||
assert isinstance(start, NFAState)
|
||||
assert isinstance(finish, NFAState)
|
||||
|
||||
def addclosure(nfa_state, base_nfa_set):
|
||||
assert isinstance(nfa_state, NFAState)
|
||||
if nfa_state in base_nfa_set:
|
||||
return
|
||||
base_nfa_set.add(nfa_state)
|
||||
for nfa_arc in nfa_state.arcs:
|
||||
if nfa_arc.nonterminal_or_string is None:
|
||||
addclosure(nfa_arc.next, base_nfa_set)
|
||||
|
||||
base_nfa_set = set()
|
||||
addclosure(start, base_nfa_set)
|
||||
states = [DFAState(start.from_rule, base_nfa_set, finish)]
|
||||
for state in states: # NB states grows while we're iterating
|
||||
arcs = {}
|
||||
# Find state transitions and store them in arcs.
|
||||
for nfa_state in state.nfa_set:
|
||||
for nfa_arc in nfa_state.arcs:
|
||||
if nfa_arc.nonterminal_or_string is not None:
|
||||
nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set())
|
||||
addclosure(nfa_arc.next, nfa_set)
|
||||
|
||||
# Now create the dfa's with no None's in arcs anymore. All Nones have
|
||||
# been eliminated and state transitions (arcs) are properly defined, we
|
||||
# just need to create the dfa's.
|
||||
for nonterminal_or_string, nfa_set in arcs.items():
|
||||
for nested_state in states:
|
||||
if nested_state.nfa_set == nfa_set:
|
||||
# The DFA state already exists for this rule.
|
||||
break
|
||||
else:
|
||||
nested_state = DFAState(start.from_rule, nfa_set, finish)
|
||||
states.append(nested_state)
|
||||
|
||||
state.add_arc(nested_state, nonterminal_or_string)
|
||||
return states # List of DFAState instances; first one is start
|
||||
|
||||
|
||||
def _dump_nfa(start, finish):
|
||||
print("Dump of NFA for", start.from_rule)
|
||||
todo = [start]
|
||||
for i, state in enumerate(todo):
|
||||
print(" State", i, state is finish and "(final)" or "")
|
||||
for arc in state.arcs:
|
||||
label, next_ = arc.nonterminal_or_string, arc.next
|
||||
if next_ in todo:
|
||||
j = todo.index(next_)
|
||||
else:
|
||||
j = len(todo)
|
||||
todo.append(next_)
|
||||
if label is None:
|
||||
print(" -> %d" % j)
|
||||
else:
|
||||
print(" %s -> %d" % (label, j))
|
||||
|
||||
|
||||
def _dump_dfas(dfas):
|
||||
print("Dump of DFA for", dfas[0].from_rule)
|
||||
for i, state in enumerate(dfas):
|
||||
print(" State", i, state.is_final and "(final)" or "")
|
||||
for nonterminal, next_ in state.arcs.items():
|
||||
print(" %s -> %d" % (nonterminal, dfas.index(next_)))
|
||||
|
||||
|
||||
def generate_grammar(bnf_grammar, token_namespace):
|
||||
"""
|
||||
``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
|
||||
at-least-once repetition, [] for optional parts, | for alternatives and ()
|
||||
for grouping).
|
||||
|
||||
It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
|
||||
own parser.
|
||||
"""
|
||||
rule_to_dfas = {}
|
||||
start_nonterminal = None
|
||||
for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():
|
||||
#_dump_nfa(nfa_a, nfa_z)
|
||||
dfas = _make_dfas(nfa_a, nfa_z)
|
||||
#_dump_dfas(dfas)
|
||||
# oldlen = len(dfas)
|
||||
_simplify_dfas(dfas)
|
||||
# newlen = len(dfas)
|
||||
rule_to_dfas[nfa_a.from_rule] = dfas
|
||||
#print(nfa_a.from_rule, oldlen, newlen)
|
||||
|
||||
if start_nonterminal is None:
|
||||
start_nonterminal = nfa_a.from_rule
|
||||
|
||||
reserved_strings = {}
|
||||
for nonterminal, dfas in rule_to_dfas.items():
|
||||
for dfa_state in dfas:
|
||||
for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():
|
||||
if terminal_or_nonterminal in rule_to_dfas:
|
||||
dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa
|
||||
else:
|
||||
transition = _make_transition(
|
||||
token_namespace,
|
||||
reserved_strings,
|
||||
terminal_or_nonterminal
|
||||
)
|
||||
dfa_state.transitions[transition] = DFAPlan(next_dfa)
|
||||
|
||||
_calculate_tree_traversal(rule_to_dfas)
|
||||
return Grammar(start_nonterminal, rule_to_dfas, reserved_strings)
|
||||
|
||||
|
||||
def _make_transition(token_namespace, reserved_syntax_strings, label):
|
||||
"""
|
||||
Creates a reserved string ("if", "for", "*", ...) or returns the token type
|
||||
(NUMBER, STRING, ...) for a given grammar terminal.
|
||||
"""
|
||||
if label[0].isalpha():
|
||||
# A named token (e.g. NAME, NUMBER, STRING)
|
||||
return getattr(token_namespace, label)
|
||||
else:
|
||||
# Either a keyword or an operator
|
||||
assert label[0] in ('"', "'"), label
|
||||
assert not label.startswith('"""') and not label.startswith("'''")
|
||||
value = literal_eval(label)
|
||||
try:
|
||||
return reserved_syntax_strings[value]
|
||||
except KeyError:
|
||||
r = reserved_syntax_strings[value] = ReservedString(value)
|
||||
return r
|
||||
|
||||
|
||||
def _calculate_tree_traversal(nonterminal_to_dfas):
|
||||
"""
|
||||
By this point we know how dfas can move around within a stack node, but we
|
||||
don't know how we can add a new stack node (nonterminal transitions).
|
||||
"""
|
||||
# Map from grammar rule (nonterminal) name to a set of tokens.
|
||||
first_plans = {}
|
||||
|
||||
nonterminals = list(nonterminal_to_dfas.keys())
|
||||
nonterminals.sort()
|
||||
for nonterminal in nonterminals:
|
||||
if nonterminal not in first_plans:
|
||||
_calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal)
|
||||
|
||||
# Now that we have calculated the first terminals, we are sure that
|
||||
# there is no left recursion.
|
||||
|
||||
for dfas in nonterminal_to_dfas.values():
|
||||
for dfa_state in dfas:
|
||||
transitions = dfa_state.transitions
|
||||
for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
|
||||
for transition, pushes in first_plans[nonterminal].items():
|
||||
if transition in transitions:
|
||||
prev_plan = transitions[transition]
|
||||
# Make sure these are sorted so that error messages are
|
||||
# at least deterministic
|
||||
choices = sorted([
|
||||
(
|
||||
prev_plan.dfa_pushes[0].from_rule
|
||||
if prev_plan.dfa_pushes
|
||||
else prev_plan.next_dfa.from_rule
|
||||
),
|
||||
(
|
||||
pushes[0].from_rule
|
||||
if pushes else next_dfa.from_rule
|
||||
),
|
||||
])
|
||||
raise ValueError(
|
||||
"Rule %s is ambiguous; given a %s token, we "
|
||||
"can't determine if we should evaluate %s or %s."
|
||||
% (
|
||||
(
|
||||
dfa_state.from_rule,
|
||||
transition,
|
||||
) + tuple(choices)
|
||||
)
|
||||
)
|
||||
transitions[transition] = DFAPlan(next_dfa, pushes)
|
||||
|
||||
|
||||
def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
|
||||
"""
|
||||
Calculates the first plan in the first_plans dictionary for every given
|
||||
nonterminal. This is going to be used to know when to create stack nodes.
|
||||
"""
|
||||
dfas = nonterminal_to_dfas[nonterminal]
|
||||
new_first_plans = {}
|
||||
first_plans[nonterminal] = None # dummy to detect left recursion
|
||||
# We only need to check the first dfa. All the following ones are not
|
||||
# interesting to find first terminals.
|
||||
state = dfas[0]
|
||||
for transition, next_ in state.transitions.items():
|
||||
# It's a string. We have finally found a possible first token.
|
||||
new_first_plans[transition] = [next_.next_dfa]
|
||||
|
||||
for nonterminal2, next_ in state.nonterminal_arcs.items():
|
||||
# It's a nonterminal and we have either a left recursion issue
|
||||
# in the grammar or we have to recurse.
|
||||
try:
|
||||
first_plans2 = first_plans[nonterminal2]
|
||||
except KeyError:
|
||||
first_plans2 = _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal2)
|
||||
else:
|
||||
if first_plans2 is None:
|
||||
raise ValueError("left recursion for rule %r" % nonterminal)
|
||||
|
||||
for t, pushes in first_plans2.items():
|
||||
new_first_plans[t] = [next_] + pushes
|
||||
|
||||
first_plans[nonterminal] = new_first_plans
|
||||
return new_first_plans
|
38
venv/Lib/site-packages/parso/pgen2/generator.pyi
Normal file
38
venv/Lib/site-packages/parso/pgen2/generator.pyi
Normal file
|
@ -0,0 +1,38 @@
|
|||
from typing import Any, Generic, Mapping, Sequence, Set, TypeVar, Union
|
||||
|
||||
from parso.pgen2.grammar_parser import NFAState
|
||||
|
||||
_TokenTypeT = TypeVar("_TokenTypeT")
|
||||
|
||||
class Grammar(Generic[_TokenTypeT]):
|
||||
nonterminal_to_dfas: Mapping[str, Sequence[DFAState[_TokenTypeT]]]
|
||||
reserved_syntax_strings: Mapping[str, ReservedString]
|
||||
start_nonterminal: str
|
||||
def __init__(
|
||||
self,
|
||||
start_nonterminal: str,
|
||||
rule_to_dfas: Mapping[str, Sequence[DFAState]],
|
||||
reserved_syntax_strings: Mapping[str, ReservedString],
|
||||
) -> None: ...
|
||||
|
||||
class DFAPlan:
|
||||
next_dfa: DFAState
|
||||
dfa_pushes: Sequence[DFAState]
|
||||
|
||||
class DFAState(Generic[_TokenTypeT]):
|
||||
from_rule: str
|
||||
nfa_set: Set[NFAState]
|
||||
is_final: bool
|
||||
arcs: Mapping[str, DFAState] # map from all terminals/nonterminals to DFAState
|
||||
nonterminal_arcs: Mapping[str, DFAState]
|
||||
transitions: Mapping[Union[_TokenTypeT, ReservedString], DFAPlan]
|
||||
def __init__(
|
||||
self, from_rule: str, nfa_set: Set[NFAState], final: NFAState
|
||||
) -> None: ...
|
||||
|
||||
class ReservedString:
|
||||
value: str
|
||||
def __init__(self, value: str) -> None: ...
|
||||
def __repr__(self) -> str: ...
|
||||
|
||||
def generate_grammar(bnf_grammar: str, token_namespace: Any) -> Grammar[Any]: ...
|
159
venv/Lib/site-packages/parso/pgen2/grammar_parser.py
Normal file
159
venv/Lib/site-packages/parso/pgen2/grammar_parser.py
Normal file
|
@ -0,0 +1,159 @@
|
|||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Modifications:
|
||||
# Copyright David Halter and Contributors
|
||||
# Modifications are dual-licensed: MIT and PSF.
|
||||
|
||||
from parso.python.tokenize import tokenize
|
||||
from parso.utils import parse_version_string
|
||||
from parso.python.token import PythonTokenTypes
|
||||
|
||||
|
||||
class GrammarParser():
|
||||
"""
|
||||
The parser for Python grammar files.
|
||||
"""
|
||||
def __init__(self, bnf_grammar):
|
||||
self._bnf_grammar = bnf_grammar
|
||||
self.generator = tokenize(
|
||||
bnf_grammar,
|
||||
version_info=parse_version_string('3.6')
|
||||
)
|
||||
self._gettoken() # Initialize lookahead
|
||||
|
||||
def parse(self):
|
||||
# grammar: (NEWLINE | rule)* ENDMARKER
|
||||
while self.type != PythonTokenTypes.ENDMARKER:
|
||||
while self.type == PythonTokenTypes.NEWLINE:
|
||||
self._gettoken()
|
||||
|
||||
# rule: NAME ':' rhs NEWLINE
|
||||
self._current_rule_name = self._expect(PythonTokenTypes.NAME)
|
||||
self._expect(PythonTokenTypes.OP, ':')
|
||||
|
||||
a, z = self._parse_rhs()
|
||||
self._expect(PythonTokenTypes.NEWLINE)
|
||||
|
||||
yield a, z
|
||||
|
||||
def _parse_rhs(self):
|
||||
# rhs: items ('|' items)*
|
||||
a, z = self._parse_items()
|
||||
if self.value != "|":
|
||||
return a, z
|
||||
else:
|
||||
aa = NFAState(self._current_rule_name)
|
||||
zz = NFAState(self._current_rule_name)
|
||||
while True:
|
||||
# Add the possibility to go into the state of a and come back
|
||||
# to finish.
|
||||
aa.add_arc(a)
|
||||
z.add_arc(zz)
|
||||
if self.value != "|":
|
||||
break
|
||||
|
||||
self._gettoken()
|
||||
a, z = self._parse_items()
|
||||
return aa, zz
|
||||
|
||||
def _parse_items(self):
|
||||
# items: item+
|
||||
a, b = self._parse_item()
|
||||
while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \
|
||||
or self.value in ('(', '['):
|
||||
c, d = self._parse_item()
|
||||
# Need to end on the next item.
|
||||
b.add_arc(c)
|
||||
b = d
|
||||
return a, b
|
||||
|
||||
def _parse_item(self):
|
||||
# item: '[' rhs ']' | atom ['+' | '*']
|
||||
if self.value == "[":
|
||||
self._gettoken()
|
||||
a, z = self._parse_rhs()
|
||||
self._expect(PythonTokenTypes.OP, ']')
|
||||
# Make it also possible that there is no token and change the
|
||||
# state.
|
||||
a.add_arc(z)
|
||||
return a, z
|
||||
else:
|
||||
a, z = self._parse_atom()
|
||||
value = self.value
|
||||
if value not in ("+", "*"):
|
||||
return a, z
|
||||
self._gettoken()
|
||||
# Make it clear that we can go back to the old state and repeat.
|
||||
z.add_arc(a)
|
||||
if value == "+":
|
||||
return a, z
|
||||
else:
|
||||
# The end state is the same as the beginning, nothing must
|
||||
# change.
|
||||
return a, a
|
||||
|
||||
def _parse_atom(self):
|
||||
# atom: '(' rhs ')' | NAME | STRING
|
||||
if self.value == "(":
|
||||
self._gettoken()
|
||||
a, z = self._parse_rhs()
|
||||
self._expect(PythonTokenTypes.OP, ')')
|
||||
return a, z
|
||||
elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING):
|
||||
a = NFAState(self._current_rule_name)
|
||||
z = NFAState(self._current_rule_name)
|
||||
# Make it clear that the state transition requires that value.
|
||||
a.add_arc(z, self.value)
|
||||
self._gettoken()
|
||||
return a, z
|
||||
else:
|
||||
self._raise_error("expected (...) or NAME or STRING, got %s/%s",
|
||||
self.type, self.value)
|
||||
|
||||
def _expect(self, type_, value=None):
|
||||
if self.type != type_:
|
||||
self._raise_error("expected %s, got %s [%s]",
|
||||
type_, self.type, self.value)
|
||||
if value is not None and self.value != value:
|
||||
self._raise_error("expected %s, got %s", value, self.value)
|
||||
value = self.value
|
||||
self._gettoken()
|
||||
return value
|
||||
|
||||
def _gettoken(self):
|
||||
tup = next(self.generator)
|
||||
self.type, self.value, self.begin, prefix = tup
|
||||
|
||||
def _raise_error(self, msg, *args):
|
||||
if args:
|
||||
try:
|
||||
msg = msg % args
|
||||
except:
|
||||
msg = " ".join([msg] + list(map(str, args)))
|
||||
line = self._bnf_grammar.splitlines()[self.begin[0] - 1]
|
||||
raise SyntaxError(msg, ('<grammar>', self.begin[0],
|
||||
self.begin[1], line))
|
||||
|
||||
|
||||
class NFAArc(object):
|
||||
def __init__(self, next_, nonterminal_or_string):
|
||||
self.next = next_
|
||||
self.nonterminal_or_string = nonterminal_or_string
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s: %s>' % (self.__class__.__name__, self.nonterminal_or_string)
|
||||
|
||||
|
||||
class NFAState(object):
|
||||
def __init__(self, from_rule):
|
||||
self.from_rule = from_rule
|
||||
self.arcs = [] # List[nonterminal (str), NFAState]
|
||||
|
||||
def add_arc(self, next_, nonterminal_or_string=None):
|
||||
assert nonterminal_or_string is None or isinstance(nonterminal_or_string, str)
|
||||
assert isinstance(next_, NFAState)
|
||||
self.arcs.append(NFAArc(next_, nonterminal_or_string))
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s: from %s>' % (self.__class__.__name__, self.from_rule)
|
20
venv/Lib/site-packages/parso/pgen2/grammar_parser.pyi
Normal file
20
venv/Lib/site-packages/parso/pgen2/grammar_parser.pyi
Normal file
|
@ -0,0 +1,20 @@
|
|||
from typing import Generator, List, Optional, Tuple
|
||||
|
||||
from parso.python.token import TokenType
|
||||
|
||||
class GrammarParser:
|
||||
generator: Generator[TokenType, None, None]
|
||||
def __init__(self, bnf_grammar: str) -> None: ...
|
||||
def parse(self) -> Generator[Tuple[NFAState, NFAState], None, None]: ...
|
||||
|
||||
class NFAArc:
|
||||
next: NFAState
|
||||
nonterminal_or_string: Optional[str]
|
||||
def __init__(
|
||||
self, next_: NFAState, nonterminal_or_string: Optional[str]
|
||||
) -> None: ...
|
||||
|
||||
class NFAState:
|
||||
from_rule: str
|
||||
arcs: List[NFAArc]
|
||||
def __init__(self, from_rule: str) -> None: ...
|
0
venv/Lib/site-packages/parso/py.typed
Normal file
0
venv/Lib/site-packages/parso/py.typed
Normal file
0
venv/Lib/site-packages/parso/python/__init__.py
Normal file
0
venv/Lib/site-packages/parso/python/__init__.py
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
886
venv/Lib/site-packages/parso/python/diff.py
Normal file
886
venv/Lib/site-packages/parso/python/diff.py
Normal file
|
@ -0,0 +1,886 @@
|
|||
"""
|
||||
The diff parser is trying to be a faster version of the normal parser by trying
|
||||
to reuse the nodes of a previous pass over the same file. This is also called
|
||||
incremental parsing in parser literature. The difference is mostly that with
|
||||
incremental parsing you get a range that needs to be reparsed. Here we
|
||||
calculate that range ourselves by using difflib. After that it's essentially
|
||||
incremental parsing.
|
||||
|
||||
The biggest issue of this approach is that we reuse nodes in a mutable way. The
|
||||
intial design and idea is quite problematic for this parser, but it is also
|
||||
pretty fast. Measurements showed that just copying nodes in Python is simply
|
||||
quite a bit slower (especially for big files >3 kLOC). Therefore we did not
|
||||
want to get rid of the mutable nodes, since this is usually not an issue.
|
||||
|
||||
This is by far the hardest software I ever wrote, exactly because the initial
|
||||
design is crappy. When you have to account for a lot of mutable state, it
|
||||
creates a ton of issues that you would otherwise not have. This file took
|
||||
probably 3-6 months to write, which is insane for a parser.
|
||||
|
||||
There is a fuzzer in that helps test this whole thing. Please use it if you
|
||||
make changes here. If you run the fuzzer like::
|
||||
|
||||
test/fuzz_diff_parser.py random -n 100000
|
||||
|
||||
you can be pretty sure that everything is still fine. I sometimes run the
|
||||
fuzzer up to 24h to make sure everything is still ok.
|
||||
"""
|
||||
import re
|
||||
import difflib
|
||||
from collections import namedtuple
|
||||
import logging
|
||||
|
||||
from parso.utils import split_lines
|
||||
from parso.python.parser import Parser
|
||||
from parso.python.tree import EndMarker
|
||||
from parso.python.tokenize import PythonToken, BOM_UTF8_STRING
|
||||
from parso.python.token import PythonTokenTypes
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
DEBUG_DIFF_PARSER = False
|
||||
|
||||
_INDENTATION_TOKENS = 'INDENT', 'ERROR_DEDENT', 'DEDENT'
|
||||
|
||||
NEWLINE = PythonTokenTypes.NEWLINE
|
||||
DEDENT = PythonTokenTypes.DEDENT
|
||||
NAME = PythonTokenTypes.NAME
|
||||
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
|
||||
ENDMARKER = PythonTokenTypes.ENDMARKER
|
||||
|
||||
|
||||
def _is_indentation_error_leaf(node):
|
||||
return node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS
|
||||
|
||||
|
||||
def _get_previous_leaf_if_indentation(leaf):
|
||||
while leaf and _is_indentation_error_leaf(leaf):
|
||||
leaf = leaf.get_previous_leaf()
|
||||
return leaf
|
||||
|
||||
|
||||
def _get_next_leaf_if_indentation(leaf):
|
||||
while leaf and _is_indentation_error_leaf(leaf):
|
||||
leaf = leaf.get_next_leaf()
|
||||
return leaf
|
||||
|
||||
|
||||
def _get_suite_indentation(tree_node):
|
||||
return _get_indentation(tree_node.children[1])
|
||||
|
||||
|
||||
def _get_indentation(tree_node):
|
||||
return tree_node.start_pos[1]
|
||||
|
||||
|
||||
def _assert_valid_graph(node):
|
||||
"""
|
||||
Checks if the parent/children relationship is correct.
|
||||
|
||||
This is a check that only runs during debugging/testing.
|
||||
"""
|
||||
try:
|
||||
children = node.children
|
||||
except AttributeError:
|
||||
# Ignore INDENT is necessary, because indent/dedent tokens don't
|
||||
# contain value/prefix and are just around, because of the tokenizer.
|
||||
if node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS:
|
||||
assert not node.value
|
||||
assert not node.prefix
|
||||
return
|
||||
|
||||
# Calculate the content between two start positions.
|
||||
previous_leaf = _get_previous_leaf_if_indentation(node.get_previous_leaf())
|
||||
if previous_leaf is None:
|
||||
content = node.prefix
|
||||
previous_start_pos = 1, 0
|
||||
else:
|
||||
assert previous_leaf.end_pos <= node.start_pos, \
|
||||
(previous_leaf, node)
|
||||
|
||||
content = previous_leaf.value + node.prefix
|
||||
previous_start_pos = previous_leaf.start_pos
|
||||
|
||||
if '\n' in content or '\r' in content:
|
||||
splitted = split_lines(content)
|
||||
line = previous_start_pos[0] + len(splitted) - 1
|
||||
actual = line, len(splitted[-1])
|
||||
else:
|
||||
actual = previous_start_pos[0], previous_start_pos[1] + len(content)
|
||||
if content.startswith(BOM_UTF8_STRING) \
|
||||
and node.get_start_pos_of_prefix() == (1, 0):
|
||||
# Remove the byte order mark
|
||||
actual = actual[0], actual[1] - 1
|
||||
|
||||
assert node.start_pos == actual, (node.start_pos, actual)
|
||||
else:
|
||||
for child in children:
|
||||
assert child.parent == node, (node, child)
|
||||
_assert_valid_graph(child)
|
||||
|
||||
|
||||
def _assert_nodes_are_equal(node1, node2):
|
||||
try:
|
||||
children1 = node1.children
|
||||
except AttributeError:
|
||||
assert not hasattr(node2, 'children'), (node1, node2)
|
||||
assert node1.value == node2.value, (node1, node2)
|
||||
assert node1.type == node2.type, (node1, node2)
|
||||
assert node1.prefix == node2.prefix, (node1, node2)
|
||||
assert node1.start_pos == node2.start_pos, (node1, node2)
|
||||
return
|
||||
else:
|
||||
try:
|
||||
children2 = node2.children
|
||||
except AttributeError:
|
||||
assert False, (node1, node2)
|
||||
for n1, n2 in zip(children1, children2):
|
||||
_assert_nodes_are_equal(n1, n2)
|
||||
assert len(children1) == len(children2), '\n' + repr(children1) + '\n' + repr(children2)
|
||||
|
||||
|
||||
def _get_debug_error_message(module, old_lines, new_lines):
|
||||
current_lines = split_lines(module.get_code(), keepends=True)
|
||||
current_diff = difflib.unified_diff(new_lines, current_lines)
|
||||
old_new_diff = difflib.unified_diff(old_lines, new_lines)
|
||||
import parso
|
||||
return (
|
||||
"There's an issue with the diff parser. Please "
|
||||
"report (parso v%s) - Old/New:\n%s\nActual Diff (May be empty):\n%s"
|
||||
% (parso.__version__, ''.join(old_new_diff), ''.join(current_diff))
|
||||
)
|
||||
|
||||
|
||||
def _get_last_line(node_or_leaf):
|
||||
last_leaf = node_or_leaf.get_last_leaf()
|
||||
if _ends_with_newline(last_leaf):
|
||||
return last_leaf.start_pos[0]
|
||||
else:
|
||||
n = last_leaf.get_next_leaf()
|
||||
if n.type == 'endmarker' and '\n' in n.prefix:
|
||||
# This is a very special case and has to do with error recovery in
|
||||
# Parso. The problem is basically that there's no newline leaf at
|
||||
# the end sometimes (it's required in the grammar, but not needed
|
||||
# actually before endmarker, CPython just adds a newline to make
|
||||
# source code pass the parser, to account for that Parso error
|
||||
# recovery allows small_stmt instead of simple_stmt).
|
||||
return last_leaf.end_pos[0] + 1
|
||||
return last_leaf.end_pos[0]
|
||||
|
||||
|
||||
def _skip_dedent_error_leaves(leaf):
|
||||
while leaf is not None and leaf.type == 'error_leaf' and leaf.token_type == 'DEDENT':
|
||||
leaf = leaf.get_previous_leaf()
|
||||
return leaf
|
||||
|
||||
|
||||
def _ends_with_newline(leaf, suffix=''):
|
||||
leaf = _skip_dedent_error_leaves(leaf)
|
||||
|
||||
if leaf.type == 'error_leaf':
|
||||
typ = leaf.token_type.lower()
|
||||
else:
|
||||
typ = leaf.type
|
||||
|
||||
return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r')
|
||||
|
||||
|
||||
def _flows_finished(pgen_grammar, stack):
|
||||
"""
|
||||
if, while, for and try might not be finished, because another part might
|
||||
still be parsed.
|
||||
"""
|
||||
for stack_node in stack:
|
||||
if stack_node.nonterminal in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _func_or_class_has_suite(node):
|
||||
if node.type == 'decorated':
|
||||
node = node.children[-1]
|
||||
if node.type in ('async_funcdef', 'async_stmt'):
|
||||
node = node.children[-1]
|
||||
return node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite'
|
||||
|
||||
|
||||
def _suite_or_file_input_is_valid(pgen_grammar, stack):
|
||||
if not _flows_finished(pgen_grammar, stack):
|
||||
return False
|
||||
|
||||
for stack_node in reversed(stack):
|
||||
if stack_node.nonterminal == 'decorator':
|
||||
# A decorator is only valid with the upcoming function.
|
||||
return False
|
||||
|
||||
if stack_node.nonterminal == 'suite':
|
||||
# If only newline is in the suite, the suite is not valid, yet.
|
||||
return len(stack_node.nodes) > 1
|
||||
# Not reaching a suite means that we're dealing with file_input levels
|
||||
# where there's no need for a valid statement in it. It can also be empty.
|
||||
return True
|
||||
|
||||
|
||||
def _is_flow_node(node):
|
||||
if node.type == 'async_stmt':
|
||||
node = node.children[1]
|
||||
try:
|
||||
value = node.children[0].value
|
||||
except AttributeError:
|
||||
return False
|
||||
return value in ('if', 'for', 'while', 'try', 'with')
|
||||
|
||||
|
||||
class _PositionUpdatingFinished(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def _update_positions(nodes, line_offset, last_leaf):
|
||||
for node in nodes:
|
||||
try:
|
||||
children = node.children
|
||||
except AttributeError:
|
||||
# Is a leaf
|
||||
node.line += line_offset
|
||||
if node is last_leaf:
|
||||
raise _PositionUpdatingFinished
|
||||
else:
|
||||
_update_positions(children, line_offset, last_leaf)
|
||||
|
||||
|
||||
class DiffParser(object):
|
||||
"""
|
||||
An advanced form of parsing a file faster. Unfortunately comes with huge
|
||||
side effects. It changes the given module.
|
||||
"""
|
||||
def __init__(self, pgen_grammar, tokenizer, module):
|
||||
self._pgen_grammar = pgen_grammar
|
||||
self._tokenizer = tokenizer
|
||||
self._module = module
|
||||
|
||||
def _reset(self):
|
||||
self._copy_count = 0
|
||||
self._parser_count = 0
|
||||
|
||||
self._nodes_tree = _NodesTree(self._module)
|
||||
|
||||
def update(self, old_lines, new_lines):
|
||||
'''
|
||||
The algorithm works as follows:
|
||||
|
||||
Equal:
|
||||
- Assure that the start is a newline, otherwise parse until we get
|
||||
one.
|
||||
- Copy from parsed_until_line + 1 to max(i2 + 1)
|
||||
- Make sure that the indentation is correct (e.g. add DEDENT)
|
||||
- Add old and change positions
|
||||
Insert:
|
||||
- Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not
|
||||
much more.
|
||||
|
||||
Returns the new module node.
|
||||
'''
|
||||
LOG.debug('diff parser start')
|
||||
# Reset the used names cache so they get regenerated.
|
||||
self._module._used_names = None
|
||||
|
||||
self._parser_lines_new = new_lines
|
||||
|
||||
self._reset()
|
||||
|
||||
line_length = len(new_lines)
|
||||
sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
|
||||
opcodes = sm.get_opcodes()
|
||||
LOG.debug('line_lengths old: %s; new: %s' % (len(old_lines), line_length))
|
||||
|
||||
for operation, i1, i2, j1, j2 in opcodes:
|
||||
LOG.debug('-> code[%s] old[%s:%s] new[%s:%s]',
|
||||
operation, i1 + 1, i2, j1 + 1, j2)
|
||||
|
||||
if j2 == line_length and new_lines[-1] == '':
|
||||
# The empty part after the last newline is not relevant.
|
||||
j2 -= 1
|
||||
|
||||
if operation == 'equal':
|
||||
line_offset = j1 - i1
|
||||
self._copy_from_old_parser(line_offset, i1 + 1, i2, j2)
|
||||
elif operation == 'replace':
|
||||
self._parse(until_line=j2)
|
||||
elif operation == 'insert':
|
||||
self._parse(until_line=j2)
|
||||
else:
|
||||
assert operation == 'delete'
|
||||
|
||||
# With this action all change will finally be applied and we have a
|
||||
# changed module.
|
||||
self._nodes_tree.close()
|
||||
|
||||
if DEBUG_DIFF_PARSER:
|
||||
# If there is reasonable suspicion that the diff parser is not
|
||||
# behaving well, this should be enabled.
|
||||
try:
|
||||
code = ''.join(new_lines)
|
||||
assert self._module.get_code() == code
|
||||
_assert_valid_graph(self._module)
|
||||
without_diff_parser_module = Parser(
|
||||
self._pgen_grammar,
|
||||
error_recovery=True
|
||||
).parse(self._tokenizer(new_lines))
|
||||
_assert_nodes_are_equal(self._module, without_diff_parser_module)
|
||||
except AssertionError:
|
||||
print(_get_debug_error_message(self._module, old_lines, new_lines))
|
||||
raise
|
||||
|
||||
last_pos = self._module.end_pos[0]
|
||||
if last_pos != line_length:
|
||||
raise Exception(
|
||||
('(%s != %s) ' % (last_pos, line_length))
|
||||
+ _get_debug_error_message(self._module, old_lines, new_lines)
|
||||
)
|
||||
LOG.debug('diff parser end')
|
||||
return self._module
|
||||
|
||||
def _enabled_debugging(self, old_lines, lines_new):
|
||||
if self._module.get_code() != ''.join(lines_new):
|
||||
LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))
|
||||
|
||||
def _copy_from_old_parser(self, line_offset, start_line_old, until_line_old, until_line_new):
|
||||
last_until_line = -1
|
||||
while until_line_new > self._nodes_tree.parsed_until_line:
|
||||
parsed_until_line_old = self._nodes_tree.parsed_until_line - line_offset
|
||||
line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
|
||||
if line_stmt is None:
|
||||
# Parse 1 line at least. We don't need more, because we just
|
||||
# want to get into a state where the old parser has statements
|
||||
# again that can be copied (e.g. not lines within parentheses).
|
||||
self._parse(self._nodes_tree.parsed_until_line + 1)
|
||||
else:
|
||||
p_children = line_stmt.parent.children
|
||||
index = p_children.index(line_stmt)
|
||||
|
||||
if start_line_old == 1 \
|
||||
and p_children[0].get_first_leaf().prefix.startswith(BOM_UTF8_STRING):
|
||||
# If there's a BOM in the beginning, just reparse. It's too
|
||||
# complicated to account for it otherwise.
|
||||
copied_nodes = []
|
||||
else:
|
||||
from_ = self._nodes_tree.parsed_until_line + 1
|
||||
copied_nodes = self._nodes_tree.copy_nodes(
|
||||
p_children[index:],
|
||||
until_line_old,
|
||||
line_offset
|
||||
)
|
||||
# Match all the nodes that are in the wanted range.
|
||||
if copied_nodes:
|
||||
self._copy_count += 1
|
||||
|
||||
to = self._nodes_tree.parsed_until_line
|
||||
|
||||
LOG.debug('copy old[%s:%s] new[%s:%s]',
|
||||
copied_nodes[0].start_pos[0],
|
||||
copied_nodes[-1].end_pos[0] - 1, from_, to)
|
||||
else:
|
||||
# We have copied as much as possible (but definitely not too
|
||||
# much). Therefore we just parse a bit more.
|
||||
self._parse(self._nodes_tree.parsed_until_line + 1)
|
||||
# Since there are potential bugs that might loop here endlessly, we
|
||||
# just stop here.
|
||||
assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
|
||||
last_until_line = self._nodes_tree.parsed_until_line
|
||||
|
||||
def _get_old_line_stmt(self, old_line):
|
||||
leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)
|
||||
|
||||
if _ends_with_newline(leaf):
|
||||
leaf = leaf.get_next_leaf()
|
||||
if leaf.get_start_pos_of_prefix()[0] == old_line:
|
||||
node = leaf
|
||||
while node.parent.type not in ('file_input', 'suite'):
|
||||
node = node.parent
|
||||
|
||||
# Make sure that if only the `else:` line of an if statement is
|
||||
# copied that not the whole thing is going to be copied.
|
||||
if node.start_pos[0] >= old_line:
|
||||
return node
|
||||
# Must be on the same line. Otherwise we need to parse that bit.
|
||||
return None
|
||||
|
||||
def _parse(self, until_line):
|
||||
"""
|
||||
Parses at least until the given line, but might just parse more until a
|
||||
valid state is reached.
|
||||
"""
|
||||
last_until_line = 0
|
||||
while until_line > self._nodes_tree.parsed_until_line:
|
||||
node = self._try_parse_part(until_line)
|
||||
nodes = node.children
|
||||
|
||||
self._nodes_tree.add_parsed_nodes(nodes, self._keyword_token_indents)
|
||||
if self._replace_tos_indent is not None:
|
||||
self._nodes_tree.indents[-1] = self._replace_tos_indent
|
||||
|
||||
LOG.debug(
|
||||
'parse_part from %s to %s (to %s in part parser)',
|
||||
nodes[0].get_start_pos_of_prefix()[0],
|
||||
self._nodes_tree.parsed_until_line,
|
||||
node.end_pos[0] - 1
|
||||
)
|
||||
# Since the tokenizer sometimes has bugs, we cannot be sure that
|
||||
# this loop terminates. Therefore assert that there's always a
|
||||
# change.
|
||||
assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
|
||||
last_until_line = self._nodes_tree.parsed_until_line
|
||||
|
||||
def _try_parse_part(self, until_line):
|
||||
"""
|
||||
Sets up a normal parser that uses a spezialized tokenizer to only parse
|
||||
until a certain position (or a bit longer if the statement hasn't
|
||||
ended.
|
||||
"""
|
||||
self._parser_count += 1
|
||||
# TODO speed up, shouldn't copy the whole list all the time.
|
||||
# memoryview?
|
||||
parsed_until_line = self._nodes_tree.parsed_until_line
|
||||
lines_after = self._parser_lines_new[parsed_until_line:]
|
||||
tokens = self._diff_tokenize(
|
||||
lines_after,
|
||||
until_line,
|
||||
line_offset=parsed_until_line
|
||||
)
|
||||
self._active_parser = Parser(
|
||||
self._pgen_grammar,
|
||||
error_recovery=True
|
||||
)
|
||||
return self._active_parser.parse(tokens=tokens)
|
||||
|
||||
def _diff_tokenize(self, lines, until_line, line_offset=0):
|
||||
was_newline = False
|
||||
indents = self._nodes_tree.indents
|
||||
initial_indentation_count = len(indents)
|
||||
|
||||
tokens = self._tokenizer(
|
||||
lines,
|
||||
start_pos=(line_offset + 1, 0),
|
||||
indents=indents,
|
||||
is_first_token=line_offset == 0,
|
||||
)
|
||||
stack = self._active_parser.stack
|
||||
self._replace_tos_indent = None
|
||||
self._keyword_token_indents = {}
|
||||
# print('start', line_offset + 1, indents)
|
||||
for token in tokens:
|
||||
# print(token, indents)
|
||||
typ = token.type
|
||||
if typ == DEDENT:
|
||||
if len(indents) < initial_indentation_count:
|
||||
# We are done here, only thing that can come now is an
|
||||
# endmarker or another dedented code block.
|
||||
while True:
|
||||
typ, string, start_pos, prefix = token = next(tokens)
|
||||
if typ in (DEDENT, ERROR_DEDENT):
|
||||
if typ == ERROR_DEDENT:
|
||||
# We want to force an error dedent in the next
|
||||
# parser/pass. To make this possible we just
|
||||
# increase the location by one.
|
||||
self._replace_tos_indent = start_pos[1] + 1
|
||||
pass
|
||||
else:
|
||||
break
|
||||
|
||||
if '\n' in prefix or '\r' in prefix:
|
||||
prefix = re.sub(r'[^\n\r]+\Z', '', prefix)
|
||||
else:
|
||||
assert start_pos[1] >= len(prefix), repr(prefix)
|
||||
if start_pos[1] - len(prefix) == 0:
|
||||
prefix = ''
|
||||
yield PythonToken(
|
||||
ENDMARKER, '',
|
||||
start_pos,
|
||||
prefix
|
||||
)
|
||||
break
|
||||
elif typ == NEWLINE and token.start_pos[0] >= until_line:
|
||||
was_newline = True
|
||||
elif was_newline:
|
||||
was_newline = False
|
||||
if len(indents) == initial_indentation_count:
|
||||
# Check if the parser is actually in a valid suite state.
|
||||
if _suite_or_file_input_is_valid(self._pgen_grammar, stack):
|
||||
yield PythonToken(ENDMARKER, '', token.start_pos, '')
|
||||
break
|
||||
|
||||
if typ == NAME and token.string in ('class', 'def'):
|
||||
self._keyword_token_indents[token.start_pos] = list(indents)
|
||||
|
||||
yield token
|
||||
|
||||
|
||||
class _NodesTreeNode(object):
|
||||
_ChildrenGroup = namedtuple(
|
||||
'_ChildrenGroup',
|
||||
'prefix children line_offset last_line_offset_leaf')
|
||||
|
||||
def __init__(self, tree_node, parent=None, indentation=0):
|
||||
self.tree_node = tree_node
|
||||
self._children_groups = []
|
||||
self.parent = parent
|
||||
self._node_children = []
|
||||
self.indentation = indentation
|
||||
|
||||
def finish(self):
|
||||
children = []
|
||||
for prefix, children_part, line_offset, last_line_offset_leaf in self._children_groups:
|
||||
first_leaf = _get_next_leaf_if_indentation(
|
||||
children_part[0].get_first_leaf()
|
||||
)
|
||||
|
||||
first_leaf.prefix = prefix + first_leaf.prefix
|
||||
if line_offset != 0:
|
||||
try:
|
||||
_update_positions(
|
||||
children_part, line_offset, last_line_offset_leaf)
|
||||
except _PositionUpdatingFinished:
|
||||
pass
|
||||
children += children_part
|
||||
self.tree_node.children = children
|
||||
# Reset the parents
|
||||
for node in children:
|
||||
node.parent = self.tree_node
|
||||
|
||||
for node_child in self._node_children:
|
||||
node_child.finish()
|
||||
|
||||
def add_child_node(self, child_node):
|
||||
self._node_children.append(child_node)
|
||||
|
||||
def add_tree_nodes(self, prefix, children, line_offset=0,
|
||||
last_line_offset_leaf=None):
|
||||
if last_line_offset_leaf is None:
|
||||
last_line_offset_leaf = children[-1].get_last_leaf()
|
||||
group = self._ChildrenGroup(
|
||||
prefix, children, line_offset, last_line_offset_leaf
|
||||
)
|
||||
self._children_groups.append(group)
|
||||
|
||||
def get_last_line(self, suffix):
|
||||
line = 0
|
||||
if self._children_groups:
|
||||
children_group = self._children_groups[-1]
|
||||
last_leaf = _get_previous_leaf_if_indentation(
|
||||
children_group.last_line_offset_leaf
|
||||
)
|
||||
|
||||
line = last_leaf.end_pos[0] + children_group.line_offset
|
||||
|
||||
# Newlines end on the next line, which means that they would cover
|
||||
# the next line. That line is not fully parsed at this point.
|
||||
if _ends_with_newline(last_leaf, suffix):
|
||||
line -= 1
|
||||
line += len(split_lines(suffix)) - 1
|
||||
|
||||
if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'):
|
||||
# This is the end of a file (that doesn't end with a newline).
|
||||
line += 1
|
||||
|
||||
if self._node_children:
|
||||
return max(line, self._node_children[-1].get_last_line(suffix))
|
||||
return line
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s: %s>' % (self.__class__.__name__, self.tree_node)
|
||||
|
||||
|
||||
class _NodesTree(object):
|
||||
def __init__(self, module):
|
||||
self._base_node = _NodesTreeNode(module)
|
||||
self._working_stack = [self._base_node]
|
||||
self._module = module
|
||||
self._prefix_remainder = ''
|
||||
self.prefix = ''
|
||||
self.indents = [0]
|
||||
|
||||
@property
|
||||
def parsed_until_line(self):
|
||||
return self._working_stack[-1].get_last_line(self.prefix)
|
||||
|
||||
def _update_insertion_node(self, indentation):
|
||||
for node in reversed(list(self._working_stack)):
|
||||
if node.indentation < indentation or node is self._working_stack[0]:
|
||||
return node
|
||||
self._working_stack.pop()
|
||||
|
||||
def add_parsed_nodes(self, tree_nodes, keyword_token_indents):
|
||||
old_prefix = self.prefix
|
||||
tree_nodes = self._remove_endmarker(tree_nodes)
|
||||
if not tree_nodes:
|
||||
self.prefix = old_prefix + self.prefix
|
||||
return
|
||||
|
||||
assert tree_nodes[0].type != 'newline'
|
||||
|
||||
node = self._update_insertion_node(tree_nodes[0].start_pos[1])
|
||||
assert node.tree_node.type in ('suite', 'file_input')
|
||||
node.add_tree_nodes(old_prefix, tree_nodes)
|
||||
# tos = Top of stack
|
||||
self._update_parsed_node_tos(tree_nodes[-1], keyword_token_indents)
|
||||
|
||||
def _update_parsed_node_tos(self, tree_node, keyword_token_indents):
|
||||
if tree_node.type == 'suite':
|
||||
def_leaf = tree_node.parent.children[0]
|
||||
new_tos = _NodesTreeNode(
|
||||
tree_node,
|
||||
indentation=keyword_token_indents[def_leaf.start_pos][-1],
|
||||
)
|
||||
new_tos.add_tree_nodes('', list(tree_node.children))
|
||||
|
||||
self._working_stack[-1].add_child_node(new_tos)
|
||||
self._working_stack.append(new_tos)
|
||||
|
||||
self._update_parsed_node_tos(tree_node.children[-1], keyword_token_indents)
|
||||
elif _func_or_class_has_suite(tree_node):
|
||||
self._update_parsed_node_tos(tree_node.children[-1], keyword_token_indents)
|
||||
|
||||
def _remove_endmarker(self, tree_nodes):
|
||||
"""
|
||||
Helps cleaning up the tree nodes that get inserted.
|
||||
"""
|
||||
last_leaf = tree_nodes[-1].get_last_leaf()
|
||||
is_endmarker = last_leaf.type == 'endmarker'
|
||||
self._prefix_remainder = ''
|
||||
if is_endmarker:
|
||||
prefix = last_leaf.prefix
|
||||
separation = max(prefix.rfind('\n'), prefix.rfind('\r'))
|
||||
if separation > -1:
|
||||
# Remove the whitespace part of the prefix after a newline.
|
||||
# That is not relevant if parentheses were opened. Always parse
|
||||
# until the end of a line.
|
||||
last_leaf.prefix, self._prefix_remainder = \
|
||||
last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
|
||||
|
||||
self.prefix = ''
|
||||
|
||||
if is_endmarker:
|
||||
self.prefix = last_leaf.prefix
|
||||
|
||||
tree_nodes = tree_nodes[:-1]
|
||||
return tree_nodes
|
||||
|
||||
def _get_matching_indent_nodes(self, tree_nodes, is_new_suite):
|
||||
# There might be a random dedent where we have to stop copying.
|
||||
# Invalid indents are ok, because the parser handled that
|
||||
# properly before. An invalid dedent can happen, because a few
|
||||
# lines above there was an invalid indent.
|
||||
node_iterator = iter(tree_nodes)
|
||||
if is_new_suite:
|
||||
yield next(node_iterator)
|
||||
|
||||
first_node = next(node_iterator)
|
||||
indent = _get_indentation(first_node)
|
||||
if not is_new_suite and indent not in self.indents:
|
||||
return
|
||||
yield first_node
|
||||
|
||||
for n in node_iterator:
|
||||
if _get_indentation(n) != indent:
|
||||
return
|
||||
yield n
|
||||
|
||||
def copy_nodes(self, tree_nodes, until_line, line_offset):
|
||||
"""
|
||||
Copies tree nodes from the old parser tree.
|
||||
|
||||
Returns the number of tree nodes that were copied.
|
||||
"""
|
||||
if tree_nodes[0].type in ('error_leaf', 'error_node'):
|
||||
# Avoid copying errors in the beginning. Can lead to a lot of
|
||||
# issues.
|
||||
return []
|
||||
|
||||
indentation = _get_indentation(tree_nodes[0])
|
||||
old_working_stack = list(self._working_stack)
|
||||
old_prefix = self.prefix
|
||||
old_indents = self.indents
|
||||
self.indents = [i for i in self.indents if i <= indentation]
|
||||
|
||||
self._update_insertion_node(indentation)
|
||||
|
||||
new_nodes, self._working_stack, self.prefix, added_indents = self._copy_nodes(
|
||||
list(self._working_stack),
|
||||
tree_nodes,
|
||||
until_line,
|
||||
line_offset,
|
||||
self.prefix,
|
||||
)
|
||||
if new_nodes:
|
||||
self.indents += added_indents
|
||||
else:
|
||||
self._working_stack = old_working_stack
|
||||
self.prefix = old_prefix
|
||||
self.indents = old_indents
|
||||
return new_nodes
|
||||
|
||||
def _copy_nodes(self, working_stack, nodes, until_line, line_offset,
|
||||
prefix='', is_nested=False):
|
||||
new_nodes = []
|
||||
added_indents = []
|
||||
|
||||
nodes = list(self._get_matching_indent_nodes(
|
||||
nodes,
|
||||
is_new_suite=is_nested,
|
||||
))
|
||||
|
||||
new_prefix = ''
|
||||
for node in nodes:
|
||||
if node.start_pos[0] > until_line:
|
||||
break
|
||||
|
||||
if node.type == 'endmarker':
|
||||
break
|
||||
|
||||
if node.type == 'error_leaf' and node.token_type in ('DEDENT', 'ERROR_DEDENT'):
|
||||
break
|
||||
# TODO this check might take a bit of time for large files. We
|
||||
# might want to change this to do more intelligent guessing or
|
||||
# binary search.
|
||||
if _get_last_line(node) > until_line:
|
||||
# We can split up functions and classes later.
|
||||
if _func_or_class_has_suite(node):
|
||||
new_nodes.append(node)
|
||||
break
|
||||
try:
|
||||
c = node.children
|
||||
except AttributeError:
|
||||
pass
|
||||
else:
|
||||
# This case basically appears with error recovery of one line
|
||||
# suites like `def foo(): bar.-`. In this case we might not
|
||||
# include a newline in the statement and we need to take care
|
||||
# of that.
|
||||
n = node
|
||||
if n.type == 'decorated':
|
||||
n = n.children[-1]
|
||||
if n.type in ('async_funcdef', 'async_stmt'):
|
||||
n = n.children[-1]
|
||||
if n.type in ('classdef', 'funcdef'):
|
||||
suite_node = n.children[-1]
|
||||
else:
|
||||
suite_node = c[-1]
|
||||
|
||||
if suite_node.type in ('error_leaf', 'error_node'):
|
||||
break
|
||||
|
||||
new_nodes.append(node)
|
||||
|
||||
# Pop error nodes at the end from the list
|
||||
if new_nodes:
|
||||
while new_nodes:
|
||||
last_node = new_nodes[-1]
|
||||
if (last_node.type in ('error_leaf', 'error_node')
|
||||
or _is_flow_node(new_nodes[-1])):
|
||||
# Error leafs/nodes don't have a defined start/end. Error
|
||||
# nodes might not end with a newline (e.g. if there's an
|
||||
# open `(`). Therefore ignore all of them unless they are
|
||||
# succeeded with valid parser state.
|
||||
# If we copy flows at the end, they might be continued
|
||||
# after the copy limit (in the new parser).
|
||||
# In this while loop we try to remove until we find a newline.
|
||||
new_prefix = ''
|
||||
new_nodes.pop()
|
||||
while new_nodes:
|
||||
last_node = new_nodes[-1]
|
||||
if last_node.get_last_leaf().type == 'newline':
|
||||
break
|
||||
new_nodes.pop()
|
||||
continue
|
||||
if len(new_nodes) > 1 and new_nodes[-2].type == 'error_node':
|
||||
# The problem here is that Parso error recovery sometimes
|
||||
# influences nodes before this node.
|
||||
# Since the new last node is an error node this will get
|
||||
# cleaned up in the next while iteration.
|
||||
new_nodes.pop()
|
||||
continue
|
||||
break
|
||||
|
||||
if not new_nodes:
|
||||
return [], working_stack, prefix, added_indents
|
||||
|
||||
tos = working_stack[-1]
|
||||
last_node = new_nodes[-1]
|
||||
had_valid_suite_last = False
|
||||
# Pop incomplete suites from the list
|
||||
if _func_or_class_has_suite(last_node):
|
||||
suite = last_node
|
||||
while suite.type != 'suite':
|
||||
suite = suite.children[-1]
|
||||
|
||||
indent = _get_suite_indentation(suite)
|
||||
added_indents.append(indent)
|
||||
|
||||
suite_tos = _NodesTreeNode(suite, indentation=_get_indentation(last_node))
|
||||
# Don't need to pass line_offset here, it's already done by the
|
||||
# parent.
|
||||
suite_nodes, new_working_stack, new_prefix, ai = self._copy_nodes(
|
||||
working_stack + [suite_tos], suite.children, until_line, line_offset,
|
||||
is_nested=True,
|
||||
)
|
||||
added_indents += ai
|
||||
if len(suite_nodes) < 2:
|
||||
# A suite only with newline is not valid.
|
||||
new_nodes.pop()
|
||||
new_prefix = ''
|
||||
else:
|
||||
assert new_nodes
|
||||
tos.add_child_node(suite_tos)
|
||||
working_stack = new_working_stack
|
||||
had_valid_suite_last = True
|
||||
|
||||
if new_nodes:
|
||||
if not _ends_with_newline(new_nodes[-1].get_last_leaf()) and not had_valid_suite_last:
|
||||
p = new_nodes[-1].get_next_leaf().prefix
|
||||
# We are not allowed to remove the newline at the end of the
|
||||
# line, otherwise it's going to be missing. This happens e.g.
|
||||
# if a bracket is around before that moves newlines to
|
||||
# prefixes.
|
||||
new_prefix = split_lines(p, keepends=True)[0]
|
||||
|
||||
if had_valid_suite_last:
|
||||
last = new_nodes[-1]
|
||||
if last.type == 'decorated':
|
||||
last = last.children[-1]
|
||||
if last.type in ('async_funcdef', 'async_stmt'):
|
||||
last = last.children[-1]
|
||||
last_line_offset_leaf = last.children[-2].get_last_leaf()
|
||||
assert last_line_offset_leaf == ':'
|
||||
else:
|
||||
last_line_offset_leaf = new_nodes[-1].get_last_leaf()
|
||||
tos.add_tree_nodes(
|
||||
prefix, new_nodes, line_offset, last_line_offset_leaf,
|
||||
)
|
||||
prefix = new_prefix
|
||||
self._prefix_remainder = ''
|
||||
|
||||
return new_nodes, working_stack, prefix, added_indents
|
||||
|
||||
def close(self):
|
||||
self._base_node.finish()
|
||||
|
||||
# Add an endmarker.
|
||||
try:
|
||||
last_leaf = self._module.get_last_leaf()
|
||||
except IndexError:
|
||||
end_pos = [1, 0]
|
||||
else:
|
||||
last_leaf = _skip_dedent_error_leaves(last_leaf)
|
||||
end_pos = list(last_leaf.end_pos)
|
||||
lines = split_lines(self.prefix)
|
||||
assert len(lines) > 0
|
||||
if len(lines) == 1:
|
||||
if lines[0].startswith(BOM_UTF8_STRING) and end_pos == [1, 0]:
|
||||
end_pos[1] -= 1
|
||||
end_pos[1] += len(lines[0])
|
||||
else:
|
||||
end_pos[0] += len(lines) - 1
|
||||
end_pos[1] = len(lines[-1])
|
||||
|
||||
endmarker = EndMarker('', tuple(end_pos), self.prefix + self._prefix_remainder)
|
||||
endmarker.parent = self._module
|
||||
self._module.children.append(endmarker)
|
1277
venv/Lib/site-packages/parso/python/errors.py
Normal file
1277
venv/Lib/site-packages/parso/python/errors.py
Normal file
File diff suppressed because it is too large
Load diff
143
venv/Lib/site-packages/parso/python/grammar27.txt
Normal file
143
venv/Lib/site-packages/parso/python/grammar27.txt
Normal file
|
@ -0,0 +1,143 @@
|
|||
# Grammar for Python
|
||||
|
||||
# Note: Changing the grammar specified in this file will most likely
|
||||
# require corresponding changes in the parser module
|
||||
# (../Modules/parsermodule.c). If you can't make the changes to
|
||||
# that module yourself, please co-ordinate the required changes
|
||||
# with someone who can; ask around on python-dev for help. Fred
|
||||
# Drake <fdrake@acm.org> will probably be listening there.
|
||||
|
||||
# NOTE WELL: You should also follow all the steps listed in PEP 306,
|
||||
# "How to Change Python's Grammar"
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() and input() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: stmt* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef)
|
||||
funcdef: 'def' NAME parameters ':' suite
|
||||
parameters: '(' [varargslist] ')'
|
||||
varargslist: ((fpdef ['=' test] ',')*
|
||||
('*' NAME [',' '**' NAME] | '**' NAME) |
|
||||
fpdef ['=' test] (',' fpdef ['=' test])* [','])
|
||||
fpdef: NAME | '(' fplist ')'
|
||||
fplist: fpdef (',' fpdef)* [',']
|
||||
|
||||
stmt: simple_stmt | compound_stmt | NEWLINE
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | exec_stmt | assert_stmt)
|
||||
expr_stmt: testlist (augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist))*)
|
||||
augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal assignments, additional restrictions enforced by the interpreter
|
||||
print_stmt: 'print' ( [ test (',' test)* [','] ] |
|
||||
'>>' test [ (',' test)+ [','] ] )
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test [',' test [',' test]]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
import_from: ('from' ('.'* dotted_name | '.'+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
exec_stmt: 'exec' expr ['in' test [',' test]]
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
|
||||
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test [('as' | ',') test]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
# Backward compatibility cruft to support:
|
||||
# [ x for x in lambda: True, lambda: False if x() ]
|
||||
# even while also allowing:
|
||||
# lambda x: 5 if x else 2
|
||||
# (But not a mix of the two)
|
||||
testlist_safe: old_test [(',' old_test)+ [',']]
|
||||
old_test: or_test | old_lambdef
|
||||
old_lambdef: 'lambda' [varargslist] ':' old_test
|
||||
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom trailer* ['**' factor]
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [listmaker] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
'`' testlist1 '`' |
|
||||
NAME | NUMBER | strings)
|
||||
strings: STRING+
|
||||
listmaker: test ( list_for | (',' test)* [','] )
|
||||
testlist_comp: test ( sync_comp_for | (',' test)* [','] )
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: expr (',' expr)* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) |
|
||||
(test (sync_comp_for | (',' test)* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [testlist] ')'] ':' suite
|
||||
|
||||
arglist: (argument ',')* (argument [',']
|
||||
|'*' test (',' argument)* [',' '**' test]
|
||||
|'**' test)
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
argument: test [sync_comp_for] | test '=' test
|
||||
|
||||
list_iter: list_for | list_if
|
||||
list_for: 'for' exprlist 'in' testlist_safe [list_iter]
|
||||
list_if: 'if' old_test [list_iter]
|
||||
|
||||
comp_iter: sync_comp_for | comp_if
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_if: 'if' old_test [comp_iter]
|
||||
|
||||
testlist1: test (',' test)*
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [testlist]
|
171
venv/Lib/site-packages/parso/python/grammar310.txt
Normal file
171
venv/Lib/site-packages/parso/python/grammar310.txt
Normal file
|
@ -0,0 +1,171 @@
|
|||
# Grammar for Python
|
||||
|
||||
# NOTE WELL: You should also follow all the steps listed at
|
||||
# https://devguide.python.org/grammar/
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: stmt* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' namedexpr_test NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||
|
||||
async_funcdef: 'async' funcdef
|
||||
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||
|
||||
parameters: '(' [typedargslist] ')'
|
||||
typedargslist: (
|
||||
(tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] (
|
||||
',' tfpdef ['=' test])* ([',' [
|
||||
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [',']]])
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]])
|
||||
| '**' tfpdef [',']]] )
|
||||
| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
|
||||
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [',']]]
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [','])
|
||||
)
|
||||
tfpdef: NAME [':' test]
|
||||
varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
||||
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
||||
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']
|
||||
)
|
||||
vfpdef: NAME
|
||||
|
||||
stmt: simple_stmt | compound_stmt | NEWLINE
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist_star_expr))*)
|
||||
annassign: ':' test ['=' (yield_expr|testlist_star_expr)]
|
||||
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal and annotated assignments, additional restrictions enforced by the interpreter
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist_star_expr]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test ['from' test]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
|
||||
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
|
||||
if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
namedexpr_test: test [':=' test]
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||
# sake of a __future__ import described in PEP 401 (which really works :-)
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
star_expr: '*' expr
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom_expr ['**' factor]
|
||||
atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( ((test ':' test | '**' expr)
|
||||
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
||||
((test | star_expr)
|
||||
(comp_for | (',' (test | star_expr))* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
|
||||
arglist: argument (',' argument)* [',']
|
||||
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
# "test '=' test" is really "keyword '=' test", but we have no such token.
|
||||
# These need to be in a single rule to avoid grammar that is ambiguous
|
||||
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
|
||||
# we explicitly match '*' here, too, to give it proper precedence.
|
||||
# Illegal combinations and orderings are blocked in ast.c:
|
||||
# multiple (test comp_for) arguments are blocked; keyword unpackings
|
||||
# that precede iterable unpackings are blocked; etc.
|
||||
argument: ( test [comp_for] |
|
||||
test ':=' test |
|
||||
test '=' test |
|
||||
'**' test |
|
||||
'*' test )
|
||||
|
||||
comp_iter: comp_for | comp_if
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_for: ['async'] sync_comp_for
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist_star_expr
|
||||
|
||||
strings: (STRING | fstring)+
|
||||
fstring: FSTRING_START fstring_content* FSTRING_END
|
||||
fstring_content: FSTRING_STRING | fstring_expr
|
||||
fstring_conversion: '!' NAME
|
||||
fstring_expr: '{' testlist ['='] [ fstring_conversion ] [ fstring_format_spec ] '}'
|
||||
fstring_format_spec: ':' fstring_content*
|
134
venv/Lib/site-packages/parso/python/grammar33.txt
Normal file
134
venv/Lib/site-packages/parso/python/grammar33.txt
Normal file
|
@ -0,0 +1,134 @@
|
|||
# Grammar for Python
|
||||
|
||||
# Note: Changing the grammar specified in this file will most likely
|
||||
# require corresponding changes in the parser module
|
||||
# (../Modules/parsermodule.c). If you can't make the changes to
|
||||
# that module yourself, please co-ordinate the required changes
|
||||
# with someone who can; ask around on python-dev for help. Fred
|
||||
# Drake <fdrake@acm.org> will probably be listening there.
|
||||
|
||||
# NOTE WELL: You should also follow all the steps listed in PEP 306,
|
||||
# "How to Change Python's Grammar"
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: stmt* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef)
|
||||
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||
parameters: '(' [typedargslist] ')'
|
||||
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
|
||||
['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
|
||||
tfpdef: NAME [':' test]
|
||||
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
|
||||
['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
|
||||
vfpdef: NAME
|
||||
|
||||
stmt: simple_stmt | compound_stmt | NEWLINE
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||
expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist_star_expr))*)
|
||||
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||
augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal assignments, additional restrictions enforced by the interpreter
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test ['from' test]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
|
||||
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||
# sake of a __future__ import described in PEP 401
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
star_expr: '*' expr
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom trailer* ['**' factor]
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
strings: STRING+
|
||||
testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) |
|
||||
(test (sync_comp_for | (',' test)* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
|
||||
arglist: (argument ',')* (argument [',']
|
||||
|'*' test (',' argument)* [',' '**' test]
|
||||
|'**' test)
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
argument: test [sync_comp_for] | test '=' test # Really [keyword '='] test
|
||||
comp_iter: sync_comp_for | comp_if
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist
|
134
venv/Lib/site-packages/parso/python/grammar34.txt
Normal file
134
venv/Lib/site-packages/parso/python/grammar34.txt
Normal file
|
@ -0,0 +1,134 @@
|
|||
# Grammar for Python
|
||||
|
||||
# Note: Changing the grammar specified in this file will most likely
|
||||
# require corresponding changes in the parser module
|
||||
# (../Modules/parsermodule.c). If you can't make the changes to
|
||||
# that module yourself, please co-ordinate the required changes
|
||||
# with someone who can; ask around on python-dev for help. Fred
|
||||
# Drake <fdrake@acm.org> will probably be listening there.
|
||||
|
||||
# NOTE WELL: You should also follow all the steps listed at
|
||||
# https://docs.python.org/devguide/grammar.html
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: stmt* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef)
|
||||
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||
parameters: '(' [typedargslist] ')'
|
||||
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
|
||||
['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
|
||||
tfpdef: NAME [':' test]
|
||||
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
|
||||
['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
|
||||
vfpdef: NAME
|
||||
|
||||
stmt: simple_stmt | compound_stmt | NEWLINE
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||
expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist_star_expr))*)
|
||||
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||
augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal assignments, additional restrictions enforced by the interpreter
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test ['from' test]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
|
||||
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||
# sake of a __future__ import described in PEP 401
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
star_expr: '*' expr
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom trailer* ['**' factor]
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
strings: STRING+
|
||||
testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) |
|
||||
(test (sync_comp_for | (',' test)* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
|
||||
arglist: (argument ',')* (argument [',']
|
||||
|'*' test (',' argument)* [',' '**' test]
|
||||
|'**' test)
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
argument: test [sync_comp_for] | test '=' test # Really [keyword '='] test
|
||||
comp_iter: sync_comp_for | comp_if
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist
|
153
venv/Lib/site-packages/parso/python/grammar35.txt
Normal file
153
venv/Lib/site-packages/parso/python/grammar35.txt
Normal file
|
@ -0,0 +1,153 @@
|
|||
# Grammar for Python
|
||||
|
||||
# Note: Changing the grammar specified in this file will most likely
|
||||
# require corresponding changes in the parser module
|
||||
# (../Modules/parsermodule.c). If you can't make the changes to
|
||||
# that module yourself, please co-ordinate the required changes
|
||||
# with someone who can; ask around on python-dev for help. Fred
|
||||
# Drake <fdrake@acm.org> will probably be listening there.
|
||||
|
||||
# NOTE WELL: You should also follow all the steps listed at
|
||||
# https://docs.python.org/devguide/grammar.html
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: stmt* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||
|
||||
# NOTE: Reinoud Elhorst, using ASYNC/AWAIT keywords instead of tokens
|
||||
# skipping python3.5 compatibility, in favour of 3.7 solution
|
||||
async_funcdef: 'async' funcdef
|
||||
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||
|
||||
parameters: '(' [typedargslist] ')'
|
||||
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
|
||||
['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
|
||||
tfpdef: NAME [':' test]
|
||||
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
|
||||
['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
|
||||
vfpdef: NAME
|
||||
|
||||
stmt: simple_stmt | compound_stmt | NEWLINE
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||
expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist_star_expr))*)
|
||||
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal assignments, additional restrictions enforced by the interpreter
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test ['from' test]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
|
||||
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
|
||||
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||
# sake of a __future__ import described in PEP 401 (which really works :-)
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
star_expr: '*' expr
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom_expr ['**' factor]
|
||||
atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
strings: STRING+
|
||||
testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( ((test ':' test | '**' expr)
|
||||
(sync_comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
||||
((test | star_expr)
|
||||
(sync_comp_for | (',' (test | star_expr))* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
|
||||
arglist: argument (',' argument)* [',']
|
||||
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
# "test '=' test" is really "keyword '=' test", but we have no such token.
|
||||
# These need to be in a single rule to avoid grammar that is ambiguous
|
||||
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
|
||||
# we explicitly match '*' here, too, to give it proper precedence.
|
||||
# Illegal combinations and orderings are blocked in ast.c:
|
||||
# multiple (test comp_for) arguments are blocked; keyword unpackings
|
||||
# that precede iterable unpackings are blocked; etc.
|
||||
argument: ( test [sync_comp_for] |
|
||||
test '=' test |
|
||||
'**' test |
|
||||
'*' test )
|
||||
|
||||
comp_iter: sync_comp_for | comp_if
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist
|
158
venv/Lib/site-packages/parso/python/grammar36.txt
Normal file
158
venv/Lib/site-packages/parso/python/grammar36.txt
Normal file
|
@ -0,0 +1,158 @@
|
|||
# Grammar for Python
|
||||
|
||||
# NOTE WELL: You should also follow all the steps listed at
|
||||
# https://docs.python.org/devguide/grammar.html
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: stmt* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||
|
||||
# NOTE: Francisco Souza/Reinoud Elhorst, using ASYNC/'await' keywords instead of
|
||||
# skipping python3.5+ compatibility, in favour of 3.7 solution
|
||||
async_funcdef: 'async' funcdef
|
||||
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||
|
||||
parameters: '(' [typedargslist] ')'
|
||||
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
|
||||
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [',']]]
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [','])
|
||||
tfpdef: NAME [':' test]
|
||||
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
||||
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']
|
||||
)
|
||||
vfpdef: NAME
|
||||
|
||||
stmt: simple_stmt | compound_stmt | NEWLINE
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist_star_expr))*)
|
||||
annassign: ':' test ['=' test]
|
||||
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal and annotated assignments, additional restrictions enforced by the interpreter
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test ['from' test]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
|
||||
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
|
||||
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||
# sake of a __future__ import described in PEP 401 (which really works :-)
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
star_expr: '*' expr
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom_expr ['**' factor]
|
||||
atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( ((test ':' test | '**' expr)
|
||||
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
||||
((test | star_expr)
|
||||
(comp_for | (',' (test | star_expr))* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
|
||||
arglist: argument (',' argument)* [',']
|
||||
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
# "test '=' test" is really "keyword '=' test", but we have no such token.
|
||||
# These need to be in a single rule to avoid grammar that is ambiguous
|
||||
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
|
||||
# we explicitly match '*' here, too, to give it proper precedence.
|
||||
# Illegal combinations and orderings are blocked in ast.c:
|
||||
# multiple (test comp_for) arguments are blocked; keyword unpackings
|
||||
# that precede iterable unpackings are blocked; etc.
|
||||
argument: ( test [comp_for] |
|
||||
test '=' test |
|
||||
'**' test |
|
||||
'*' test )
|
||||
|
||||
comp_iter: comp_for | comp_if
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_for: ['async'] sync_comp_for
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist
|
||||
|
||||
strings: (STRING | fstring)+
|
||||
fstring: FSTRING_START fstring_content* FSTRING_END
|
||||
fstring_content: FSTRING_STRING | fstring_expr
|
||||
fstring_conversion: '!' NAME
|
||||
fstring_expr: '{' testlist_comp [ fstring_conversion ] [ fstring_format_spec ] '}'
|
||||
fstring_format_spec: ':' fstring_content*
|
156
venv/Lib/site-packages/parso/python/grammar37.txt
Normal file
156
venv/Lib/site-packages/parso/python/grammar37.txt
Normal file
|
@ -0,0 +1,156 @@
|
|||
# Grammar for Python
|
||||
|
||||
# NOTE WELL: You should also follow all the steps listed at
|
||||
# https://docs.python.org/devguide/grammar.html
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: stmt* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||
|
||||
async_funcdef: 'async' funcdef
|
||||
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||
|
||||
parameters: '(' [typedargslist] ')'
|
||||
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
|
||||
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [',']]]
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [','])
|
||||
tfpdef: NAME [':' test]
|
||||
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
||||
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']
|
||||
)
|
||||
vfpdef: NAME
|
||||
|
||||
stmt: simple_stmt | compound_stmt | NEWLINE
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist_star_expr))*)
|
||||
annassign: ':' test ['=' test]
|
||||
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal and annotated assignments, additional restrictions enforced by the interpreter
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test ['from' test]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
|
||||
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
|
||||
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||
# sake of a __future__ import described in PEP 401 (which really works :-)
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
star_expr: '*' expr
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom_expr ['**' factor]
|
||||
atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( ((test ':' test | '**' expr)
|
||||
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
||||
((test | star_expr)
|
||||
(comp_for | (',' (test | star_expr))* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
|
||||
arglist: argument (',' argument)* [',']
|
||||
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
# "test '=' test" is really "keyword '=' test", but we have no such token.
|
||||
# These need to be in a single rule to avoid grammar that is ambiguous
|
||||
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
|
||||
# we explicitly match '*' here, too, to give it proper precedence.
|
||||
# Illegal combinations and orderings are blocked in ast.c:
|
||||
# multiple (test comp_for) arguments are blocked; keyword unpackings
|
||||
# that precede iterable unpackings are blocked; etc.
|
||||
argument: ( test [comp_for] |
|
||||
test '=' test |
|
||||
'**' test |
|
||||
'*' test )
|
||||
|
||||
comp_iter: comp_for | comp_if
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_for: ['async'] sync_comp_for
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist
|
||||
|
||||
strings: (STRING | fstring)+
|
||||
fstring: FSTRING_START fstring_content* FSTRING_END
|
||||
fstring_content: FSTRING_STRING | fstring_expr
|
||||
fstring_conversion: '!' NAME
|
||||
fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}'
|
||||
fstring_format_spec: ':' fstring_content*
|
171
venv/Lib/site-packages/parso/python/grammar38.txt
Normal file
171
venv/Lib/site-packages/parso/python/grammar38.txt
Normal file
|
@ -0,0 +1,171 @@
|
|||
# Grammar for Python
|
||||
|
||||
# NOTE WELL: You should also follow all the steps listed at
|
||||
# https://devguide.python.org/grammar/
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: stmt* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||
|
||||
async_funcdef: 'async' funcdef
|
||||
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||
|
||||
parameters: '(' [typedargslist] ')'
|
||||
typedargslist: (
|
||||
(tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] (
|
||||
',' tfpdef ['=' test])* ([',' [
|
||||
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [',']]])
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]])
|
||||
| '**' tfpdef [',']]] )
|
||||
| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
|
||||
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [',']]]
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [','])
|
||||
)
|
||||
tfpdef: NAME [':' test]
|
||||
varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
||||
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
||||
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']
|
||||
)
|
||||
vfpdef: NAME
|
||||
|
||||
stmt: simple_stmt | compound_stmt | NEWLINE
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist_star_expr))*)
|
||||
annassign: ':' test ['=' (yield_expr|testlist_star_expr)]
|
||||
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal and annotated assignments, additional restrictions enforced by the interpreter
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist_star_expr]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test ['from' test]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
|
||||
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
|
||||
if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
namedexpr_test: test [':=' test]
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||
# sake of a __future__ import described in PEP 401 (which really works :-)
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
star_expr: '*' expr
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom_expr ['**' factor]
|
||||
atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( ((test ':' test | '**' expr)
|
||||
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
||||
((test | star_expr)
|
||||
(comp_for | (',' (test | star_expr))* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
|
||||
arglist: argument (',' argument)* [',']
|
||||
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
# "test '=' test" is really "keyword '=' test", but we have no such token.
|
||||
# These need to be in a single rule to avoid grammar that is ambiguous
|
||||
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
|
||||
# we explicitly match '*' here, too, to give it proper precedence.
|
||||
# Illegal combinations and orderings are blocked in ast.c:
|
||||
# multiple (test comp_for) arguments are blocked; keyword unpackings
|
||||
# that precede iterable unpackings are blocked; etc.
|
||||
argument: ( test [comp_for] |
|
||||
test ':=' test |
|
||||
test '=' test |
|
||||
'**' test |
|
||||
'*' test )
|
||||
|
||||
comp_iter: comp_for | comp_if
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_for: ['async'] sync_comp_for
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist_star_expr
|
||||
|
||||
strings: (STRING | fstring)+
|
||||
fstring: FSTRING_START fstring_content* FSTRING_END
|
||||
fstring_content: FSTRING_STRING | fstring_expr
|
||||
fstring_conversion: '!' NAME
|
||||
fstring_expr: '{' testlist ['='] [ fstring_conversion ] [ fstring_format_spec ] '}'
|
||||
fstring_format_spec: ':' fstring_content*
|
171
venv/Lib/site-packages/parso/python/grammar39.txt
Normal file
171
venv/Lib/site-packages/parso/python/grammar39.txt
Normal file
|
@ -0,0 +1,171 @@
|
|||
# Grammar for Python
|
||||
|
||||
# NOTE WELL: You should also follow all the steps listed at
|
||||
# https://devguide.python.org/grammar/
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: stmt* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' namedexpr_test NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||
|
||||
async_funcdef: 'async' funcdef
|
||||
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||
|
||||
parameters: '(' [typedargslist] ')'
|
||||
typedargslist: (
|
||||
(tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] (
|
||||
',' tfpdef ['=' test])* ([',' [
|
||||
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [',']]])
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]])
|
||||
| '**' tfpdef [',']]] )
|
||||
| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
|
||||
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [',']]]
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [','])
|
||||
)
|
||||
tfpdef: NAME [':' test]
|
||||
varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
||||
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
||||
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']
|
||||
)
|
||||
vfpdef: NAME
|
||||
|
||||
stmt: simple_stmt | compound_stmt | NEWLINE
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist_star_expr))*)
|
||||
annassign: ':' test ['=' (yield_expr|testlist_star_expr)]
|
||||
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal and annotated assignments, additional restrictions enforced by the interpreter
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist_star_expr]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test ['from' test]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
|
||||
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
|
||||
if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
namedexpr_test: test [':=' test]
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||
# sake of a __future__ import described in PEP 401 (which really works :-)
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
star_expr: '*' expr
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom_expr ['**' factor]
|
||||
atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( ((test ':' test | '**' expr)
|
||||
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
||||
((test | star_expr)
|
||||
(comp_for | (',' (test | star_expr))* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
|
||||
arglist: argument (',' argument)* [',']
|
||||
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
# "test '=' test" is really "keyword '=' test", but we have no such token.
|
||||
# These need to be in a single rule to avoid grammar that is ambiguous
|
||||
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
|
||||
# we explicitly match '*' here, too, to give it proper precedence.
|
||||
# Illegal combinations and orderings are blocked in ast.c:
|
||||
# multiple (test comp_for) arguments are blocked; keyword unpackings
|
||||
# that precede iterable unpackings are blocked; etc.
|
||||
argument: ( test [comp_for] |
|
||||
test ':=' test |
|
||||
test '=' test |
|
||||
'**' test |
|
||||
'*' test )
|
||||
|
||||
comp_iter: comp_for | comp_if
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_for: ['async'] sync_comp_for
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist_star_expr
|
||||
|
||||
strings: (STRING | fstring)+
|
||||
fstring: FSTRING_START fstring_content* FSTRING_END
|
||||
fstring_content: FSTRING_STRING | fstring_expr
|
||||
fstring_conversion: '!' NAME
|
||||
fstring_expr: '{' testlist ['='] [ fstring_conversion ] [ fstring_format_spec ] '}'
|
||||
fstring_format_spec: ':' fstring_content*
|
217
venv/Lib/site-packages/parso/python/parser.py
Normal file
217
venv/Lib/site-packages/parso/python/parser.py
Normal file
|
@ -0,0 +1,217 @@
|
|||
from parso.python import tree
|
||||
from parso.python.token import PythonTokenTypes
|
||||
from parso.parser import BaseParser
|
||||
|
||||
|
||||
NAME = PythonTokenTypes.NAME
|
||||
INDENT = PythonTokenTypes.INDENT
|
||||
DEDENT = PythonTokenTypes.DEDENT
|
||||
|
||||
|
||||
class Parser(BaseParser):
|
||||
"""
|
||||
This class is used to parse a Python file, it then divides them into a
|
||||
class structure of different scopes.
|
||||
|
||||
:param pgen_grammar: The grammar object of pgen2. Loaded by load_grammar.
|
||||
"""
|
||||
|
||||
node_map = {
|
||||
'expr_stmt': tree.ExprStmt,
|
||||
'classdef': tree.Class,
|
||||
'funcdef': tree.Function,
|
||||
'file_input': tree.Module,
|
||||
'import_name': tree.ImportName,
|
||||
'import_from': tree.ImportFrom,
|
||||
'break_stmt': tree.KeywordStatement,
|
||||
'continue_stmt': tree.KeywordStatement,
|
||||
'return_stmt': tree.ReturnStmt,
|
||||
'raise_stmt': tree.KeywordStatement,
|
||||
'yield_expr': tree.YieldExpr,
|
||||
'del_stmt': tree.KeywordStatement,
|
||||
'pass_stmt': tree.KeywordStatement,
|
||||
'global_stmt': tree.GlobalStmt,
|
||||
'nonlocal_stmt': tree.KeywordStatement,
|
||||
'print_stmt': tree.KeywordStatement,
|
||||
'assert_stmt': tree.AssertStmt,
|
||||
'if_stmt': tree.IfStmt,
|
||||
'with_stmt': tree.WithStmt,
|
||||
'for_stmt': tree.ForStmt,
|
||||
'while_stmt': tree.WhileStmt,
|
||||
'try_stmt': tree.TryStmt,
|
||||
'sync_comp_for': tree.SyncCompFor,
|
||||
# Not sure if this is the best idea, but IMO it's the easiest way to
|
||||
# avoid extreme amounts of work around the subtle difference of 2/3
|
||||
# grammar in list comoprehensions.
|
||||
'list_for': tree.SyncCompFor,
|
||||
'decorator': tree.Decorator,
|
||||
'lambdef': tree.Lambda,
|
||||
'old_lambdef': tree.Lambda,
|
||||
'lambdef_nocond': tree.Lambda,
|
||||
}
|
||||
default_node = tree.PythonNode
|
||||
|
||||
# Names/Keywords are handled separately
|
||||
_leaf_map = {
|
||||
PythonTokenTypes.STRING: tree.String,
|
||||
PythonTokenTypes.NUMBER: tree.Number,
|
||||
PythonTokenTypes.NEWLINE: tree.Newline,
|
||||
PythonTokenTypes.ENDMARKER: tree.EndMarker,
|
||||
PythonTokenTypes.FSTRING_STRING: tree.FStringString,
|
||||
PythonTokenTypes.FSTRING_START: tree.FStringStart,
|
||||
PythonTokenTypes.FSTRING_END: tree.FStringEnd,
|
||||
}
|
||||
|
||||
def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
|
||||
super(Parser, self).__init__(pgen_grammar, start_nonterminal,
|
||||
error_recovery=error_recovery)
|
||||
|
||||
self.syntax_errors = []
|
||||
self._omit_dedent_list = []
|
||||
self._indent_counter = 0
|
||||
|
||||
def parse(self, tokens):
|
||||
if self._error_recovery:
|
||||
if self._start_nonterminal != 'file_input':
|
||||
raise NotImplementedError
|
||||
|
||||
tokens = self._recovery_tokenize(tokens)
|
||||
|
||||
return super(Parser, self).parse(tokens)
|
||||
|
||||
def convert_node(self, nonterminal, children):
|
||||
"""
|
||||
Convert raw node information to a PythonBaseNode instance.
|
||||
|
||||
This is passed to the parser driver which calls it whenever a reduction of a
|
||||
grammar rule produces a new complete node, so that the tree is build
|
||||
strictly bottom-up.
|
||||
"""
|
||||
try:
|
||||
node = self.node_map[nonterminal](children)
|
||||
except KeyError:
|
||||
if nonterminal == 'suite':
|
||||
# We don't want the INDENT/DEDENT in our parser tree. Those
|
||||
# leaves are just cancer. They are virtual leaves and not real
|
||||
# ones and therefore have pseudo start/end positions and no
|
||||
# prefixes. Just ignore them.
|
||||
children = [children[0]] + children[2:-1]
|
||||
elif nonterminal == 'list_if':
|
||||
# Make transitioning from 2 to 3 easier.
|
||||
nonterminal = 'comp_if'
|
||||
elif nonterminal == 'listmaker':
|
||||
# Same as list_if above.
|
||||
nonterminal = 'testlist_comp'
|
||||
node = self.default_node(nonterminal, children)
|
||||
for c in children:
|
||||
c.parent = node
|
||||
return node
|
||||
|
||||
def convert_leaf(self, type, value, prefix, start_pos):
|
||||
# print('leaf', repr(value), token.tok_name[type])
|
||||
if type == NAME:
|
||||
if value in self._pgen_grammar.reserved_syntax_strings:
|
||||
return tree.Keyword(value, start_pos, prefix)
|
||||
else:
|
||||
return tree.Name(value, start_pos, prefix)
|
||||
|
||||
return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix)
|
||||
|
||||
def error_recovery(self, token):
|
||||
tos_nodes = self.stack[-1].nodes
|
||||
if tos_nodes:
|
||||
last_leaf = tos_nodes[-1].get_last_leaf()
|
||||
else:
|
||||
last_leaf = None
|
||||
|
||||
if self._start_nonterminal == 'file_input' and \
|
||||
(token.type == PythonTokenTypes.ENDMARKER
|
||||
or token.type == DEDENT and not last_leaf.value.endswith('\n')
|
||||
and not last_leaf.value.endswith('\r')):
|
||||
# In Python statements need to end with a newline. But since it's
|
||||
# possible (and valid in Python) that there's no newline at the
|
||||
# end of a file, we have to recover even if the user doesn't want
|
||||
# error recovery.
|
||||
if self.stack[-1].dfa.from_rule == 'simple_stmt':
|
||||
try:
|
||||
plan = self.stack[-1].dfa.transitions[PythonTokenTypes.NEWLINE]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
if plan.next_dfa.is_final and not plan.dfa_pushes:
|
||||
# We are ignoring here that the newline would be
|
||||
# required for a simple_stmt.
|
||||
self.stack[-1].dfa = plan.next_dfa
|
||||
self._add_token(token)
|
||||
return
|
||||
|
||||
if not self._error_recovery:
|
||||
return super(Parser, self).error_recovery(token)
|
||||
|
||||
def current_suite(stack):
|
||||
# For now just discard everything that is not a suite or
|
||||
# file_input, if we detect an error.
|
||||
for until_index, stack_node in reversed(list(enumerate(stack))):
|
||||
# `suite` can sometimes be only simple_stmt, not stmt.
|
||||
if stack_node.nonterminal == 'file_input':
|
||||
break
|
||||
elif stack_node.nonterminal == 'suite':
|
||||
# In the case where we just have a newline we don't want to
|
||||
# do error recovery here. In all other cases, we want to do
|
||||
# error recovery.
|
||||
if len(stack_node.nodes) != 1:
|
||||
break
|
||||
return until_index
|
||||
|
||||
until_index = current_suite(self.stack)
|
||||
|
||||
if self._stack_removal(until_index + 1):
|
||||
self._add_token(token)
|
||||
else:
|
||||
typ, value, start_pos, prefix = token
|
||||
if typ == INDENT:
|
||||
# For every deleted INDENT we have to delete a DEDENT as well.
|
||||
# Otherwise the parser will get into trouble and DEDENT too early.
|
||||
self._omit_dedent_list.append(self._indent_counter)
|
||||
|
||||
error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix)
|
||||
self.stack[-1].nodes.append(error_leaf)
|
||||
|
||||
tos = self.stack[-1]
|
||||
if tos.nonterminal == 'suite':
|
||||
# Need at least one statement in the suite. This happend with the
|
||||
# error recovery above.
|
||||
try:
|
||||
tos.dfa = tos.dfa.arcs['stmt']
|
||||
except KeyError:
|
||||
# We're already in a final state.
|
||||
pass
|
||||
|
||||
def _stack_removal(self, start_index):
|
||||
all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes]
|
||||
|
||||
if all_nodes:
|
||||
node = tree.PythonErrorNode(all_nodes)
|
||||
for n in all_nodes:
|
||||
n.parent = node
|
||||
self.stack[start_index - 1].nodes.append(node)
|
||||
|
||||
self.stack[start_index:] = []
|
||||
return bool(all_nodes)
|
||||
|
||||
def _recovery_tokenize(self, tokens):
|
||||
for token in tokens:
|
||||
typ = token[0]
|
||||
if typ == DEDENT:
|
||||
# We need to count indents, because if we just omit any DEDENT,
|
||||
# we might omit them in the wrong place.
|
||||
o = self._omit_dedent_list
|
||||
if o and o[-1] == self._indent_counter:
|
||||
o.pop()
|
||||
self._indent_counter -= 1
|
||||
continue
|
||||
|
||||
self._indent_counter -= 1
|
||||
elif typ == INDENT:
|
||||
self._indent_counter += 1
|
||||
yield token
|
727
venv/Lib/site-packages/parso/python/pep8.py
Normal file
727
venv/Lib/site-packages/parso/python/pep8.py
Normal file
|
@ -0,0 +1,727 @@
|
|||
import re
|
||||
from contextlib import contextmanager
|
||||
|
||||
from parso.python.errors import ErrorFinder, ErrorFinderConfig
|
||||
from parso.normalizer import Rule
|
||||
from parso.python.tree import search_ancestor, Flow, Scope
|
||||
|
||||
|
||||
_IMPORT_TYPES = ('import_name', 'import_from')
|
||||
_SUITE_INTRODUCERS = ('classdef', 'funcdef', 'if_stmt', 'while_stmt',
|
||||
'for_stmt', 'try_stmt', 'with_stmt')
|
||||
_NON_STAR_TYPES = ('term', 'import_from', 'power')
|
||||
_OPENING_BRACKETS = '(', '[', '{'
|
||||
_CLOSING_BRACKETS = ')', ']', '}'
|
||||
_FACTOR = '+', '-', '~'
|
||||
_ALLOW_SPACE = '*', '+', '-', '**', '/', '//', '@'
|
||||
_BITWISE_OPERATOR = '<<', '>>', '|', '&', '^'
|
||||
_NEEDS_SPACE = ('=', '%', '->',
|
||||
'<', '>', '==', '>=', '<=', '<>', '!=',
|
||||
'+=', '-=', '*=', '@=', '/=', '%=', '&=', '|=', '^=', '<<=',
|
||||
'>>=', '**=', '//=')
|
||||
_NEEDS_SPACE += _BITWISE_OPERATOR
|
||||
_IMPLICIT_INDENTATION_TYPES = ('dictorsetmaker', 'argument')
|
||||
_POSSIBLE_SLICE_PARENTS = ('subscript', 'subscriptlist', 'sliceop')
|
||||
|
||||
|
||||
class IndentationTypes(object):
|
||||
VERTICAL_BRACKET = object()
|
||||
HANGING_BRACKET = object()
|
||||
BACKSLASH = object()
|
||||
SUITE = object()
|
||||
IMPLICIT = object()
|
||||
|
||||
|
||||
class IndentationNode(object):
|
||||
type = IndentationTypes.SUITE
|
||||
|
||||
def __init__(self, config, indentation, parent=None):
|
||||
self.bracket_indentation = self.indentation = indentation
|
||||
self.parent = parent
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s>' % self.__class__.__name__
|
||||
|
||||
def get_latest_suite_node(self):
|
||||
n = self
|
||||
while n is not None:
|
||||
if n.type == IndentationTypes.SUITE:
|
||||
return n
|
||||
|
||||
n = n.parent
|
||||
|
||||
|
||||
class BracketNode(IndentationNode):
|
||||
def __init__(self, config, leaf, parent, in_suite_introducer=False):
|
||||
self.leaf = leaf
|
||||
|
||||
# Figure out here what the indentation is. For chained brackets
|
||||
# we can basically use the previous indentation.
|
||||
previous_leaf = leaf
|
||||
n = parent
|
||||
if n.type == IndentationTypes.IMPLICIT:
|
||||
n = n.parent
|
||||
while True:
|
||||
if hasattr(n, 'leaf') and previous_leaf.line != n.leaf.line:
|
||||
break
|
||||
|
||||
previous_leaf = previous_leaf.get_previous_leaf()
|
||||
if not isinstance(n, BracketNode) or previous_leaf != n.leaf:
|
||||
break
|
||||
n = n.parent
|
||||
parent_indentation = n.indentation
|
||||
|
||||
|
||||
next_leaf = leaf.get_next_leaf()
|
||||
if '\n' in next_leaf.prefix:
|
||||
# This implies code like:
|
||||
# foobarbaz(
|
||||
# a,
|
||||
# b,
|
||||
# )
|
||||
self.bracket_indentation = parent_indentation \
|
||||
+ config.closing_bracket_hanging_indentation
|
||||
self.indentation = parent_indentation + config.indentation
|
||||
self.type = IndentationTypes.HANGING_BRACKET
|
||||
else:
|
||||
# Implies code like:
|
||||
# foobarbaz(
|
||||
# a,
|
||||
# b,
|
||||
# )
|
||||
expected_end_indent = leaf.end_pos[1]
|
||||
if '\t' in config.indentation:
|
||||
self.indentation = None
|
||||
else:
|
||||
self.indentation = ' ' * expected_end_indent
|
||||
self.bracket_indentation = self.indentation
|
||||
self.type = IndentationTypes.VERTICAL_BRACKET
|
||||
|
||||
if in_suite_introducer and parent.type == IndentationTypes.SUITE \
|
||||
and self.indentation == parent_indentation + config.indentation:
|
||||
self.indentation += config.indentation
|
||||
# The closing bracket should have the same indentation.
|
||||
self.bracket_indentation = self.indentation
|
||||
self.parent = parent
|
||||
|
||||
|
||||
class ImplicitNode(BracketNode):
|
||||
"""
|
||||
Implicit indentation after keyword arguments, default arguments,
|
||||
annotations and dict values.
|
||||
"""
|
||||
def __init__(self, config, leaf, parent):
|
||||
super(ImplicitNode, self).__init__(config, leaf, parent)
|
||||
self.type = IndentationTypes.IMPLICIT
|
||||
|
||||
next_leaf = leaf.get_next_leaf()
|
||||
if leaf == ':' and '\n' not in next_leaf.prefix:
|
||||
self.indentation += ' '
|
||||
|
||||
|
||||
class BackslashNode(IndentationNode):
|
||||
type = IndentationTypes.BACKSLASH
|
||||
|
||||
def __init__(self, config, parent_indentation, containing_leaf, spacing, parent=None):
|
||||
expr_stmt = search_ancestor(containing_leaf, 'expr_stmt')
|
||||
if expr_stmt is not None:
|
||||
equals = expr_stmt.children[-2]
|
||||
|
||||
if '\t' in config.indentation:
|
||||
# TODO unite with the code of BracketNode
|
||||
self.indentation = None
|
||||
else:
|
||||
# If the backslash follows the equals, use normal indentation
|
||||
# otherwise it should align with the equals.
|
||||
if equals.end_pos == spacing.start_pos:
|
||||
self.indentation = parent_indentation + config.indentation
|
||||
else:
|
||||
# +1 because there is a space.
|
||||
self.indentation = ' ' * (equals.end_pos[1] + 1)
|
||||
else:
|
||||
self.indentation = parent_indentation + config.indentation
|
||||
self.bracket_indentation = self.indentation
|
||||
self.parent = parent
|
||||
|
||||
|
||||
def _is_magic_name(name):
|
||||
return name.value.startswith('__') and name.value.endswith('__')
|
||||
|
||||
|
||||
class PEP8Normalizer(ErrorFinder):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(PEP8Normalizer, self).__init__(*args, **kwargs)
|
||||
self._previous_part = None
|
||||
self._previous_leaf = None
|
||||
self._on_newline = True
|
||||
self._newline_count = 0
|
||||
self._wanted_newline_count = None
|
||||
self._max_new_lines_in_prefix = 0
|
||||
self._new_statement = True
|
||||
self._implicit_indentation_possible = False
|
||||
# The top of stack of the indentation nodes.
|
||||
self._indentation_tos = self._last_indentation_tos = \
|
||||
IndentationNode(self._config, indentation='')
|
||||
self._in_suite_introducer = False
|
||||
|
||||
if ' ' in self._config.indentation:
|
||||
self._indentation_type = 'spaces'
|
||||
self._wrong_indentation_char = '\t'
|
||||
else:
|
||||
self._indentation_type = 'tabs'
|
||||
self._wrong_indentation_char = ' '
|
||||
|
||||
@contextmanager
|
||||
def visit_node(self, node):
|
||||
with super(PEP8Normalizer, self).visit_node(node):
|
||||
with self._visit_node(node):
|
||||
yield
|
||||
|
||||
@contextmanager
|
||||
def _visit_node(self, node):
|
||||
typ = node.type
|
||||
|
||||
if typ in 'import_name':
|
||||
names = node.get_defined_names()
|
||||
if len(names) > 1:
|
||||
for name in names[:1]:
|
||||
self.add_issue(name, 401, 'Multiple imports on one line')
|
||||
elif typ == 'lambdef':
|
||||
expr_stmt = node.parent
|
||||
# Check if it's simply defining a single name, not something like
|
||||
# foo.bar or x[1], where using a lambda could make more sense.
|
||||
if expr_stmt.type == 'expr_stmt' and any(n.type == 'name' for n in expr_stmt.children[:-2:2]):
|
||||
self.add_issue(node, 731, 'Do not assign a lambda expression, use a def')
|
||||
elif typ == 'try_stmt':
|
||||
for child in node.children:
|
||||
# Here we can simply check if it's an except, because otherwise
|
||||
# it would be an except_clause.
|
||||
if child.type == 'keyword' and child.value == 'except':
|
||||
self.add_issue(child, 722, 'Do not use bare except, specify exception instead')
|
||||
elif typ == 'comparison':
|
||||
for child in node.children:
|
||||
if child.type not in ('atom_expr', 'power'):
|
||||
continue
|
||||
if len(child.children) > 2:
|
||||
continue
|
||||
trailer = child.children[1]
|
||||
atom = child.children[0]
|
||||
if trailer.type == 'trailer' and atom.type == 'name' \
|
||||
and atom.value == 'type':
|
||||
self.add_issue(node, 721, "Do not compare types, use 'isinstance()")
|
||||
break
|
||||
elif typ == 'file_input':
|
||||
endmarker = node.children[-1]
|
||||
prev = endmarker.get_previous_leaf()
|
||||
prefix = endmarker.prefix
|
||||
if (not prefix.endswith('\n') and (
|
||||
prefix or prev is None or prev.value != '\n')):
|
||||
self.add_issue(endmarker, 292, "No newline at end of file")
|
||||
|
||||
if typ in _IMPORT_TYPES:
|
||||
simple_stmt = node.parent
|
||||
module = simple_stmt.parent
|
||||
#if module.type == 'simple_stmt':
|
||||
if module.type == 'file_input':
|
||||
index = module.children.index(simple_stmt)
|
||||
for child in module.children[:index]:
|
||||
children = [child]
|
||||
if child.type == 'simple_stmt':
|
||||
# Remove the newline.
|
||||
children = child.children[:-1]
|
||||
|
||||
found_docstring = False
|
||||
for c in children:
|
||||
if c.type == 'string' and not found_docstring:
|
||||
continue
|
||||
found_docstring = True
|
||||
|
||||
if c.type == 'expr_stmt' and \
|
||||
all(_is_magic_name(n) for n in c.get_defined_names()):
|
||||
continue
|
||||
|
||||
if c.type in _IMPORT_TYPES or isinstance(c, Flow):
|
||||
continue
|
||||
|
||||
self.add_issue(node, 402, 'Module level import not at top of file')
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
|
||||
implicit_indentation_possible = typ in _IMPLICIT_INDENTATION_TYPES
|
||||
in_introducer = typ in _SUITE_INTRODUCERS
|
||||
if in_introducer:
|
||||
self._in_suite_introducer = True
|
||||
elif typ == 'suite':
|
||||
if self._indentation_tos.type == IndentationTypes.BACKSLASH:
|
||||
self._indentation_tos = self._indentation_tos.parent
|
||||
|
||||
self._indentation_tos = IndentationNode(
|
||||
self._config,
|
||||
self._indentation_tos.indentation + self._config.indentation,
|
||||
parent=self._indentation_tos
|
||||
)
|
||||
elif implicit_indentation_possible:
|
||||
self._implicit_indentation_possible = True
|
||||
yield
|
||||
if typ == 'suite':
|
||||
assert self._indentation_tos.type == IndentationTypes.SUITE
|
||||
self._indentation_tos = self._indentation_tos.parent
|
||||
# If we dedent, no lines are needed anymore.
|
||||
self._wanted_newline_count = None
|
||||
elif implicit_indentation_possible:
|
||||
self._implicit_indentation_possible = False
|
||||
if self._indentation_tos.type == IndentationTypes.IMPLICIT:
|
||||
self._indentation_tos = self._indentation_tos.parent
|
||||
elif in_introducer:
|
||||
self._in_suite_introducer = False
|
||||
if typ in ('classdef', 'funcdef'):
|
||||
self._wanted_newline_count = self._get_wanted_blank_lines_count()
|
||||
|
||||
def _check_tabs_spaces(self, spacing):
|
||||
if self._wrong_indentation_char in spacing.value:
|
||||
self.add_issue(spacing, 101, 'Indentation contains ' + self._indentation_type)
|
||||
return True
|
||||
return False
|
||||
|
||||
def _get_wanted_blank_lines_count(self):
|
||||
suite_node = self._indentation_tos.get_latest_suite_node()
|
||||
return int(suite_node.parent is None) + 1
|
||||
|
||||
def _reset_newlines(self, spacing, leaf, is_comment=False):
|
||||
self._max_new_lines_in_prefix = \
|
||||
max(self._max_new_lines_in_prefix, self._newline_count)
|
||||
|
||||
wanted = self._wanted_newline_count
|
||||
if wanted is not None:
|
||||
# Need to substract one
|
||||
blank_lines = self._newline_count - 1
|
||||
if wanted > blank_lines and leaf.type != 'endmarker':
|
||||
# In case of a comment we don't need to add the issue, yet.
|
||||
if not is_comment:
|
||||
# TODO end_pos wrong.
|
||||
code = 302 if wanted == 2 else 301
|
||||
message = "expected %s blank line, found %s" \
|
||||
% (wanted, blank_lines)
|
||||
self.add_issue(spacing, code, message)
|
||||
self._wanted_newline_count = None
|
||||
else:
|
||||
self._wanted_newline_count = None
|
||||
|
||||
if not is_comment:
|
||||
wanted = self._get_wanted_blank_lines_count()
|
||||
actual = self._max_new_lines_in_prefix - 1
|
||||
|
||||
val = leaf.value
|
||||
needs_lines = (
|
||||
val == '@' and leaf.parent.type == 'decorator'
|
||||
or (
|
||||
val == 'class'
|
||||
or val == 'async' and leaf.get_next_leaf() == 'def'
|
||||
or val == 'def' and self._previous_leaf != 'async'
|
||||
) and leaf.parent.parent.type != 'decorated'
|
||||
)
|
||||
if needs_lines and actual < wanted:
|
||||
func_or_cls = leaf.parent
|
||||
suite = func_or_cls.parent
|
||||
if suite.type == 'decorated':
|
||||
suite = suite.parent
|
||||
|
||||
# The first leaf of a file or a suite should not need blank
|
||||
# lines.
|
||||
if suite.children[int(suite.type == 'suite')] != func_or_cls:
|
||||
code = 302 if wanted == 2 else 301
|
||||
message = "expected %s blank line, found %s" \
|
||||
% (wanted, actual)
|
||||
self.add_issue(spacing, code, message)
|
||||
|
||||
self._max_new_lines_in_prefix = 0
|
||||
|
||||
self._newline_count = 0
|
||||
|
||||
def visit_leaf(self, leaf):
|
||||
super(PEP8Normalizer, self).visit_leaf(leaf)
|
||||
for part in leaf._split_prefix():
|
||||
if part.type == 'spacing':
|
||||
# This part is used for the part call after for.
|
||||
break
|
||||
self._visit_part(part, part.create_spacing_part(), leaf)
|
||||
|
||||
self._analyse_non_prefix(leaf)
|
||||
self._visit_part(leaf, part, leaf)
|
||||
|
||||
# Cleanup
|
||||
self._last_indentation_tos = self._indentation_tos
|
||||
|
||||
self._new_statement = leaf.type == 'newline'
|
||||
|
||||
# TODO does this work? with brackets and stuff?
|
||||
if leaf.type == 'newline' and \
|
||||
self._indentation_tos.type == IndentationTypes.BACKSLASH:
|
||||
self._indentation_tos = self._indentation_tos.parent
|
||||
|
||||
if leaf.value == ':' and leaf.parent.type in _SUITE_INTRODUCERS:
|
||||
self._in_suite_introducer = False
|
||||
elif leaf.value == 'elif':
|
||||
self._in_suite_introducer = True
|
||||
|
||||
if not self._new_statement:
|
||||
self._reset_newlines(part, leaf)
|
||||
self._max_blank_lines = 0
|
||||
|
||||
self._previous_leaf = leaf
|
||||
|
||||
return leaf.value
|
||||
|
||||
def _visit_part(self, part, spacing, leaf):
|
||||
value = part.value
|
||||
type_ = part.type
|
||||
if type_ == 'error_leaf':
|
||||
return
|
||||
|
||||
if value == ',' and part.parent.type == 'dictorsetmaker':
|
||||
self._indentation_tos = self._indentation_tos.parent
|
||||
|
||||
node = self._indentation_tos
|
||||
|
||||
if type_ == 'comment':
|
||||
if value.startswith('##'):
|
||||
# Whole blocks of # should not raise an error.
|
||||
if value.lstrip('#'):
|
||||
self.add_issue(part, 266, "Too many leading '#' for block comment.")
|
||||
elif self._on_newline:
|
||||
if not re.match(r'#:? ', value) and not value == '#' \
|
||||
and not (value.startswith('#!') and part.start_pos == (1, 0)):
|
||||
self.add_issue(part, 265, "Block comment should start with '# '")
|
||||
else:
|
||||
if not re.match(r'#:? [^ ]', value):
|
||||
self.add_issue(part, 262, "Inline comment should start with '# '")
|
||||
|
||||
self._reset_newlines(spacing, leaf, is_comment=True)
|
||||
elif type_ == 'newline':
|
||||
if self._newline_count > self._get_wanted_blank_lines_count():
|
||||
self.add_issue(part, 303, "Too many blank lines (%s)" % self._newline_count)
|
||||
elif leaf in ('def', 'class') \
|
||||
and leaf.parent.parent.type == 'decorated':
|
||||
self.add_issue(part, 304, "Blank lines found after function decorator")
|
||||
|
||||
|
||||
self._newline_count += 1
|
||||
|
||||
if type_ == 'backslash':
|
||||
# TODO is this enough checking? What about ==?
|
||||
if node.type != IndentationTypes.BACKSLASH:
|
||||
if node.type != IndentationTypes.SUITE:
|
||||
self.add_issue(part, 502, 'The backslash is redundant between brackets')
|
||||
else:
|
||||
indentation = node.indentation
|
||||
if self._in_suite_introducer and node.type == IndentationTypes.SUITE:
|
||||
indentation += self._config.indentation
|
||||
|
||||
self._indentation_tos = BackslashNode(
|
||||
self._config,
|
||||
indentation,
|
||||
part,
|
||||
spacing,
|
||||
parent=self._indentation_tos
|
||||
)
|
||||
elif self._on_newline:
|
||||
indentation = spacing.value
|
||||
if node.type == IndentationTypes.BACKSLASH \
|
||||
and self._previous_part.type == 'newline':
|
||||
self._indentation_tos = self._indentation_tos.parent
|
||||
|
||||
if not self._check_tabs_spaces(spacing):
|
||||
should_be_indentation = node.indentation
|
||||
if type_ == 'comment':
|
||||
# Comments can be dedented. So we have to care for that.
|
||||
n = self._last_indentation_tos
|
||||
while True:
|
||||
if len(indentation) > len(n.indentation):
|
||||
break
|
||||
|
||||
should_be_indentation = n.indentation
|
||||
|
||||
self._last_indentation_tos = n
|
||||
if n == node:
|
||||
break
|
||||
n = n.parent
|
||||
|
||||
if self._new_statement:
|
||||
if type_ == 'newline':
|
||||
if indentation:
|
||||
self.add_issue(spacing, 291, 'Trailing whitespace')
|
||||
elif indentation != should_be_indentation:
|
||||
s = '%s %s' % (len(self._config.indentation), self._indentation_type)
|
||||
self.add_issue(part, 111, 'Indentation is not a multiple of ' + s)
|
||||
else:
|
||||
if value in '])}':
|
||||
should_be_indentation = node.bracket_indentation
|
||||
else:
|
||||
should_be_indentation = node.indentation
|
||||
if self._in_suite_introducer and indentation == \
|
||||
node.get_latest_suite_node().indentation \
|
||||
+ self._config.indentation:
|
||||
self.add_issue(part, 129, "Line with same indent as next logical block")
|
||||
elif indentation != should_be_indentation:
|
||||
if not self._check_tabs_spaces(spacing) and part.value != '\n':
|
||||
if value in '])}':
|
||||
if node.type == IndentationTypes.VERTICAL_BRACKET:
|
||||
self.add_issue(part, 124, "Closing bracket does not match visual indentation")
|
||||
else:
|
||||
self.add_issue(part, 123, "Losing bracket does not match indentation of opening bracket's line")
|
||||
else:
|
||||
if len(indentation) < len(should_be_indentation):
|
||||
if node.type == IndentationTypes.VERTICAL_BRACKET:
|
||||
self.add_issue(part, 128, 'Continuation line under-indented for visual indent')
|
||||
elif node.type == IndentationTypes.BACKSLASH:
|
||||
self.add_issue(part, 122, 'Continuation line missing indentation or outdented')
|
||||
elif node.type == IndentationTypes.IMPLICIT:
|
||||
self.add_issue(part, 135, 'xxx')
|
||||
else:
|
||||
self.add_issue(part, 121, 'Continuation line under-indented for hanging indent')
|
||||
else:
|
||||
if node.type == IndentationTypes.VERTICAL_BRACKET:
|
||||
self.add_issue(part, 127, 'Continuation line over-indented for visual indent')
|
||||
elif node.type == IndentationTypes.IMPLICIT:
|
||||
self.add_issue(part, 136, 'xxx')
|
||||
else:
|
||||
self.add_issue(part, 126, 'Continuation line over-indented for hanging indent')
|
||||
else:
|
||||
self._check_spacing(part, spacing)
|
||||
|
||||
self._check_line_length(part, spacing)
|
||||
# -------------------------------
|
||||
# Finalizing. Updating the state.
|
||||
# -------------------------------
|
||||
if value and value in '()[]{}' and type_ != 'error_leaf' \
|
||||
and part.parent.type != 'error_node':
|
||||
if value in _OPENING_BRACKETS:
|
||||
self._indentation_tos = BracketNode(
|
||||
self._config, part,
|
||||
parent=self._indentation_tos,
|
||||
in_suite_introducer=self._in_suite_introducer
|
||||
)
|
||||
else:
|
||||
assert node.type != IndentationTypes.IMPLICIT
|
||||
self._indentation_tos = self._indentation_tos.parent
|
||||
elif value in ('=', ':') and self._implicit_indentation_possible \
|
||||
and part.parent.type in _IMPLICIT_INDENTATION_TYPES:
|
||||
indentation = node.indentation
|
||||
self._indentation_tos = ImplicitNode(
|
||||
self._config, part, parent=self._indentation_tos
|
||||
)
|
||||
|
||||
self._on_newline = type_ in ('newline', 'backslash', 'bom')
|
||||
|
||||
self._previous_part = part
|
||||
self._previous_spacing = spacing
|
||||
|
||||
def _check_line_length(self, part, spacing):
|
||||
if part.type == 'backslash':
|
||||
last_column = part.start_pos[1] + 1
|
||||
else:
|
||||
last_column = part.end_pos[1]
|
||||
if last_column > self._config.max_characters \
|
||||
and spacing.start_pos[1] <= self._config.max_characters :
|
||||
# Special case for long URLs in multi-line docstrings or comments,
|
||||
# but still report the error when the 72 first chars are whitespaces.
|
||||
report = True
|
||||
if part.type == 'comment':
|
||||
splitted = part.value[1:].split()
|
||||
if len(splitted) == 1 \
|
||||
and (part.end_pos[1] - len(splitted[0])) < 72:
|
||||
report = False
|
||||
if report:
|
||||
self.add_issue(
|
||||
part,
|
||||
501,
|
||||
'Line too long (%s > %s characters)' %
|
||||
(last_column, self._config.max_characters),
|
||||
)
|
||||
|
||||
def _check_spacing(self, part, spacing):
|
||||
def add_if_spaces(*args):
|
||||
if spaces:
|
||||
return self.add_issue(*args)
|
||||
|
||||
def add_not_spaces(*args):
|
||||
if not spaces:
|
||||
return self.add_issue(*args)
|
||||
|
||||
spaces = spacing.value
|
||||
prev = self._previous_part
|
||||
if prev is not None and prev.type == 'error_leaf' or part.type == 'error_leaf':
|
||||
return
|
||||
|
||||
type_ = part.type
|
||||
if '\t' in spaces:
|
||||
self.add_issue(spacing, 223, 'Used tab to separate tokens')
|
||||
elif type_ == 'comment':
|
||||
if len(spaces) < self._config.spaces_before_comment:
|
||||
self.add_issue(spacing, 261, 'At least two spaces before inline comment')
|
||||
elif type_ == 'newline':
|
||||
add_if_spaces(spacing, 291, 'Trailing whitespace')
|
||||
elif len(spaces) > 1:
|
||||
self.add_issue(spacing, 221, 'Multiple spaces used')
|
||||
else:
|
||||
if prev in _OPENING_BRACKETS:
|
||||
message = "Whitespace after '%s'" % part.value
|
||||
add_if_spaces(spacing, 201, message)
|
||||
elif part in _CLOSING_BRACKETS:
|
||||
message = "Whitespace before '%s'" % part.value
|
||||
add_if_spaces(spacing, 202, message)
|
||||
elif part in (',', ';') or part == ':' \
|
||||
and part.parent.type not in _POSSIBLE_SLICE_PARENTS:
|
||||
message = "Whitespace before '%s'" % part.value
|
||||
add_if_spaces(spacing, 203, message)
|
||||
elif prev == ':' and prev.parent.type in _POSSIBLE_SLICE_PARENTS:
|
||||
pass # TODO
|
||||
elif prev in (',', ';', ':'):
|
||||
add_not_spaces(spacing, 231, "missing whitespace after '%s'")
|
||||
elif part == ':': # Is a subscript
|
||||
# TODO
|
||||
pass
|
||||
elif part in ('*', '**') and part.parent.type not in _NON_STAR_TYPES \
|
||||
or prev in ('*', '**') \
|
||||
and prev.parent.type not in _NON_STAR_TYPES:
|
||||
# TODO
|
||||
pass
|
||||
elif prev in _FACTOR and prev.parent.type == 'factor':
|
||||
pass
|
||||
elif prev == '@' and prev.parent.type == 'decorator':
|
||||
pass # TODO should probably raise an error if there's a space here
|
||||
elif part in _NEEDS_SPACE or prev in _NEEDS_SPACE:
|
||||
if part == '=' and part.parent.type in ('argument', 'param') \
|
||||
or prev == '=' and prev.parent.type in ('argument', 'param'):
|
||||
if part == '=':
|
||||
param = part.parent
|
||||
else:
|
||||
param = prev.parent
|
||||
if param.type == 'param' and param.annotation:
|
||||
add_not_spaces(spacing, 252, 'Expected spaces around annotation equals')
|
||||
else:
|
||||
add_if_spaces(spacing, 251, 'Unexpected spaces around keyword / parameter equals')
|
||||
elif part in _BITWISE_OPERATOR or prev in _BITWISE_OPERATOR:
|
||||
add_not_spaces(spacing, 227, 'Missing whitespace around bitwise or shift operator')
|
||||
elif part == '%' or prev == '%':
|
||||
add_not_spaces(spacing, 228, 'Missing whitespace around modulo operator')
|
||||
else:
|
||||
message_225 = 'Missing whitespace between tokens'
|
||||
add_not_spaces(spacing, 225, message_225)
|
||||
elif type_ == 'keyword' or prev.type == 'keyword':
|
||||
add_not_spaces(spacing, 275, 'Missing whitespace around keyword')
|
||||
else:
|
||||
prev_spacing = self._previous_spacing
|
||||
if prev in _ALLOW_SPACE and spaces != prev_spacing.value \
|
||||
and '\n' not in self._previous_leaf.prefix:
|
||||
message = "Whitespace before operator doesn't match with whitespace after"
|
||||
self.add_issue(spacing, 229, message)
|
||||
|
||||
if spaces and part not in _ALLOW_SPACE and prev not in _ALLOW_SPACE:
|
||||
message_225 = 'Missing whitespace between tokens'
|
||||
#print('xy', spacing)
|
||||
#self.add_issue(spacing, 225, message_225)
|
||||
# TODO why only brackets?
|
||||
if part in _OPENING_BRACKETS:
|
||||
message = "Whitespace before '%s'" % part.value
|
||||
add_if_spaces(spacing, 211, message)
|
||||
|
||||
def _analyse_non_prefix(self, leaf):
|
||||
typ = leaf.type
|
||||
if typ == 'name' and leaf.value in ('l', 'O', 'I'):
|
||||
if leaf.is_definition():
|
||||
message = "Do not define %s named 'l', 'O', or 'I' one line"
|
||||
if leaf.parent.type == 'class' and leaf.parent.name == leaf:
|
||||
self.add_issue(leaf, 742, message % 'classes')
|
||||
elif leaf.parent.type == 'function' and leaf.parent.name == leaf:
|
||||
self.add_issue(leaf, 743, message % 'function')
|
||||
else:
|
||||
self.add_issuadd_issue(741, message % 'variables', leaf)
|
||||
elif leaf.value == ':':
|
||||
if isinstance(leaf.parent, (Flow, Scope)) and leaf.parent.type != 'lambdef':
|
||||
next_leaf = leaf.get_next_leaf()
|
||||
if next_leaf.type != 'newline':
|
||||
if leaf.parent.type == 'funcdef':
|
||||
self.add_issue(next_leaf, 704, 'Multiple statements on one line (def)')
|
||||
else:
|
||||
self.add_issue(next_leaf, 701, 'Multiple statements on one line (colon)')
|
||||
elif leaf.value == ';':
|
||||
if leaf.get_next_leaf().type in ('newline', 'endmarker'):
|
||||
self.add_issue(leaf, 703, 'Statement ends with a semicolon')
|
||||
else:
|
||||
self.add_issue(leaf, 702, 'Multiple statements on one line (semicolon)')
|
||||
elif leaf.value in ('==', '!='):
|
||||
comparison = leaf.parent
|
||||
index = comparison.children.index(leaf)
|
||||
left = comparison.children[index - 1]
|
||||
right = comparison.children[index + 1]
|
||||
for node in left, right:
|
||||
if node.type == 'keyword' or node.type == 'name':
|
||||
if node.value == 'None':
|
||||
message = "comparison to None should be 'if cond is None:'"
|
||||
self.add_issue(leaf, 711, message)
|
||||
break
|
||||
elif node.value in ('True', 'False'):
|
||||
message = "comparison to False/True should be 'if cond is True:' or 'if cond:'"
|
||||
self.add_issue(leaf, 712, message)
|
||||
break
|
||||
elif leaf.value in ('in', 'is'):
|
||||
comparison = leaf.parent
|
||||
if comparison.type == 'comparison' and comparison.parent.type == 'not_test':
|
||||
if leaf.value == 'in':
|
||||
self.add_issue(leaf, 713, "test for membership should be 'not in'")
|
||||
else:
|
||||
self.add_issue(leaf, 714, "test for object identity should be 'is not'")
|
||||
elif typ == 'string':
|
||||
# Checking multiline strings
|
||||
for i, line in enumerate(leaf.value.splitlines()[1:]):
|
||||
indentation = re.match(r'[ \t]*', line).group(0)
|
||||
start_pos = leaf.line + i, len(indentation)
|
||||
# TODO check multiline indentation.
|
||||
elif typ == 'endmarker':
|
||||
if self._newline_count >= 2:
|
||||
self.add_issue(leaf, 391, 'Blank line at end of file')
|
||||
|
||||
def add_issue(self, node, code, message):
|
||||
if self._previous_leaf is not None:
|
||||
if search_ancestor(self._previous_leaf, 'error_node') is not None:
|
||||
return
|
||||
if self._previous_leaf.type == 'error_leaf':
|
||||
return
|
||||
if search_ancestor(node, 'error_node') is not None:
|
||||
return
|
||||
if code in (901, 903):
|
||||
# 901 and 903 are raised by the ErrorFinder.
|
||||
super(PEP8Normalizer, self).add_issue(node, code, message)
|
||||
else:
|
||||
# Skip ErrorFinder here, because it has custom behavior.
|
||||
super(ErrorFinder, self).add_issue(node, code, message)
|
||||
|
||||
|
||||
class PEP8NormalizerConfig(ErrorFinderConfig):
|
||||
normalizer_class = PEP8Normalizer
|
||||
"""
|
||||
Normalizing to PEP8. Not really implemented, yet.
|
||||
"""
|
||||
def __init__(self, indentation=' ' * 4, hanging_indentation=None,
|
||||
max_characters=79, spaces_before_comment=2):
|
||||
self.indentation = indentation
|
||||
if hanging_indentation is None:
|
||||
hanging_indentation = indentation
|
||||
self.hanging_indentation = hanging_indentation
|
||||
self.closing_bracket_hanging_indentation = ''
|
||||
self.break_after_binary = False
|
||||
self.max_characters = max_characters
|
||||
self.spaces_before_comment = spaces_before_comment
|
||||
|
||||
|
||||
# TODO this is not yet ready.
|
||||
#@PEP8Normalizer.register_rule(type='endmarker')
|
||||
class BlankLineAtEnd(Rule):
|
||||
code = 392
|
||||
message = 'Blank line at end of file'
|
||||
|
||||
def is_issue(self, leaf):
|
||||
return self._newline_count >= 2
|
97
venv/Lib/site-packages/parso/python/prefix.py
Normal file
97
venv/Lib/site-packages/parso/python/prefix.py
Normal file
|
@ -0,0 +1,97 @@
|
|||
import re
|
||||
from codecs import BOM_UTF8
|
||||
|
||||
from parso.python.tokenize import group
|
||||
|
||||
unicode_bom = BOM_UTF8.decode('utf-8')
|
||||
|
||||
|
||||
class PrefixPart(object):
|
||||
def __init__(self, leaf, typ, value, spacing='', start_pos=None):
|
||||
assert start_pos is not None
|
||||
self.parent = leaf
|
||||
self.type = typ
|
||||
self.value = value
|
||||
self.spacing = spacing
|
||||
self.start_pos = start_pos
|
||||
|
||||
@property
|
||||
def end_pos(self):
|
||||
if self.value.endswith('\n'):
|
||||
return self.start_pos[0] + 1, 0
|
||||
if self.value == unicode_bom:
|
||||
# The bom doesn't have a length at the start of a Python file.
|
||||
return self.start_pos
|
||||
return self.start_pos[0], self.start_pos[1] + len(self.value)
|
||||
|
||||
def create_spacing_part(self):
|
||||
column = self.start_pos[1] - len(self.spacing)
|
||||
return PrefixPart(
|
||||
self.parent, 'spacing', self.spacing,
|
||||
start_pos=(self.start_pos[0], column)
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s, %s, %s)' % (
|
||||
self.__class__.__name__,
|
||||
self.type,
|
||||
repr(self.value),
|
||||
self.start_pos
|
||||
)
|
||||
|
||||
|
||||
_comment = r'#[^\n\r\f]*'
|
||||
_backslash = r'\\\r?\n'
|
||||
_newline = r'\r?\n'
|
||||
_form_feed = r'\f'
|
||||
_only_spacing = '$'
|
||||
_spacing = r'[ \t]*'
|
||||
_bom = unicode_bom
|
||||
|
||||
_regex = group(
|
||||
_comment, _backslash, _newline, _form_feed, _only_spacing, _bom,
|
||||
capture=True
|
||||
)
|
||||
_regex = re.compile(group(_spacing, capture=True) + _regex)
|
||||
|
||||
|
||||
_types = {
|
||||
'#': 'comment',
|
||||
'\\': 'backslash',
|
||||
'\f': 'formfeed',
|
||||
'\n': 'newline',
|
||||
'\r': 'newline',
|
||||
unicode_bom: 'bom'
|
||||
}
|
||||
|
||||
|
||||
def split_prefix(leaf, start_pos):
|
||||
line, column = start_pos
|
||||
start = 0
|
||||
value = spacing = ''
|
||||
bom = False
|
||||
while start != len(leaf.prefix):
|
||||
match =_regex.match(leaf.prefix, start)
|
||||
spacing = match.group(1)
|
||||
value = match.group(2)
|
||||
if not value:
|
||||
break
|
||||
type_ = _types[value[0]]
|
||||
yield PrefixPart(
|
||||
leaf, type_, value, spacing,
|
||||
start_pos=(line, column + start - int(bom) + len(spacing))
|
||||
)
|
||||
if type_ == 'bom':
|
||||
bom = True
|
||||
|
||||
start = match.end(0)
|
||||
if value.endswith('\n'):
|
||||
line += 1
|
||||
column = -start
|
||||
|
||||
if value:
|
||||
spacing = ''
|
||||
yield PrefixPart(
|
||||
leaf, 'spacing', spacing,
|
||||
start_pos=(line, column + start)
|
||||
)
|
27
venv/Lib/site-packages/parso/python/token.py
Normal file
27
venv/Lib/site-packages/parso/python/token.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
|
||||
class TokenType(object):
|
||||
def __init__(self, name, contains_syntax=False):
|
||||
self.name = name
|
||||
self.contains_syntax = contains_syntax
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s)' % (self.__class__.__name__, self.name)
|
||||
|
||||
|
||||
class TokenTypes(object):
|
||||
"""
|
||||
Basically an enum, but Python 2 doesn't have enums in the standard library.
|
||||
"""
|
||||
def __init__(self, names, contains_syntax):
|
||||
for name in names:
|
||||
setattr(self, name, TokenType(name, contains_syntax=name in contains_syntax))
|
||||
|
||||
|
||||
PythonTokenTypes = TokenTypes((
|
||||
'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT',
|
||||
'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP',
|
||||
'ENDMARKER'),
|
||||
contains_syntax=('NAME', 'OP'),
|
||||
)
|
30
venv/Lib/site-packages/parso/python/token.pyi
Normal file
30
venv/Lib/site-packages/parso/python/token.pyi
Normal file
|
@ -0,0 +1,30 @@
|
|||
from typing import Container, Iterable
|
||||
|
||||
class TokenType:
|
||||
name: str
|
||||
contains_syntax: bool
|
||||
def __init__(self, name: str, contains_syntax: bool) -> None: ...
|
||||
|
||||
class TokenTypes:
|
||||
def __init__(
|
||||
self, names: Iterable[str], contains_syntax: Container[str]
|
||||
) -> None: ...
|
||||
|
||||
# not an actual class in the source code, but we need this class to type the fields of
|
||||
# PythonTokenTypes
|
||||
class _FakePythonTokenTypesClass(TokenTypes):
|
||||
STRING: TokenType
|
||||
NUMBER: TokenType
|
||||
NAME: TokenType
|
||||
ERRORTOKEN: TokenType
|
||||
NEWLINE: TokenType
|
||||
INDENT: TokenType
|
||||
DEDENT: TokenType
|
||||
ERROR_DEDENT: TokenType
|
||||
FSTRING_STRING: TokenType
|
||||
FSTRING_START: TokenType
|
||||
FSTRING_END: TokenType
|
||||
OP: TokenType
|
||||
ENDMARKER: TokenType
|
||||
|
||||
PythonTokenTypes: _FakePythonTokenTypesClass = ...
|
722
venv/Lib/site-packages/parso/python/tokenize.py
Normal file
722
venv/Lib/site-packages/parso/python/tokenize.py
Normal file
|
@ -0,0 +1,722 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
This tokenizer has been copied from the ``tokenize.py`` standard library
|
||||
tokenizer. The reason was simple: The standard library tokenizer fails
|
||||
if the indentation is not right. To make it possible to do error recovery the
|
||||
tokenizer needed to be rewritten.
|
||||
|
||||
Basically this is a stripped down version of the standard library module, so
|
||||
you can read the documentation there. Additionally we included some speed and
|
||||
memory optimizations here.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import sys
|
||||
import re
|
||||
from collections import namedtuple
|
||||
import itertools as _itertools
|
||||
from codecs import BOM_UTF8
|
||||
|
||||
from parso.python.token import PythonTokenTypes
|
||||
from parso.utils import split_lines
|
||||
|
||||
|
||||
# Maximum code point of Unicode 6.0: 0x10ffff (1,114,111)
|
||||
MAX_UNICODE = '\U0010ffff'
|
||||
|
||||
STRING = PythonTokenTypes.STRING
|
||||
NAME = PythonTokenTypes.NAME
|
||||
NUMBER = PythonTokenTypes.NUMBER
|
||||
OP = PythonTokenTypes.OP
|
||||
NEWLINE = PythonTokenTypes.NEWLINE
|
||||
INDENT = PythonTokenTypes.INDENT
|
||||
DEDENT = PythonTokenTypes.DEDENT
|
||||
ENDMARKER = PythonTokenTypes.ENDMARKER
|
||||
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
|
||||
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
|
||||
FSTRING_START = PythonTokenTypes.FSTRING_START
|
||||
FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
|
||||
FSTRING_END = PythonTokenTypes.FSTRING_END
|
||||
|
||||
TokenCollection = namedtuple(
|
||||
'TokenCollection',
|
||||
'pseudo_token single_quoted triple_quoted endpats whitespace '
|
||||
'fstring_pattern_map always_break_tokens',
|
||||
)
|
||||
|
||||
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
|
||||
|
||||
_token_collection_cache = {}
|
||||
|
||||
if sys.version_info.major >= 3:
|
||||
# Python 3 has str.isidentifier() to check if a char is a valid identifier
|
||||
is_identifier = str.isidentifier
|
||||
else:
|
||||
# Python 2 doesn't, but it's not that important anymore and if you tokenize
|
||||
# Python 2 code with this, it's still ok. It's just that parsing Python 3
|
||||
# code with this function is not 100% correct.
|
||||
# This just means that Python 2 code matches a few identifiers too much,
|
||||
# but that doesn't really matter.
|
||||
def is_identifier(s):
|
||||
return True
|
||||
|
||||
|
||||
def group(*choices, **kwargs):
|
||||
capture = kwargs.pop('capture', False) # Python 2, arrghhhhh :(
|
||||
assert not kwargs
|
||||
|
||||
start = '('
|
||||
if not capture:
|
||||
start += '?:'
|
||||
return start + '|'.join(choices) + ')'
|
||||
|
||||
|
||||
def maybe(*choices):
|
||||
return group(*choices) + '?'
|
||||
|
||||
|
||||
# Return the empty string, plus all of the valid string prefixes.
|
||||
def _all_string_prefixes(version_info, include_fstring=False, only_fstring=False):
|
||||
def different_case_versions(prefix):
|
||||
for s in _itertools.product(*[(c, c.upper()) for c in prefix]):
|
||||
yield ''.join(s)
|
||||
# The valid string prefixes. Only contain the lower case versions,
|
||||
# and don't contain any permuations (include 'fr', but not
|
||||
# 'rf'). The various permutations will be generated.
|
||||
valid_string_prefixes = ['b', 'r', 'u']
|
||||
if version_info.major >= 3:
|
||||
valid_string_prefixes.append('br')
|
||||
|
||||
result = set([''])
|
||||
if version_info >= (3, 6) and include_fstring:
|
||||
f = ['f', 'fr']
|
||||
if only_fstring:
|
||||
valid_string_prefixes = f
|
||||
result = set()
|
||||
else:
|
||||
valid_string_prefixes += f
|
||||
elif only_fstring:
|
||||
return set()
|
||||
|
||||
# if we add binary f-strings, add: ['fb', 'fbr']
|
||||
for prefix in valid_string_prefixes:
|
||||
for t in _itertools.permutations(prefix):
|
||||
# create a list with upper and lower versions of each
|
||||
# character
|
||||
result.update(different_case_versions(t))
|
||||
if version_info.major == 2:
|
||||
# In Python 2 the order cannot just be random.
|
||||
result.update(different_case_versions('ur'))
|
||||
result.update(different_case_versions('br'))
|
||||
return result
|
||||
|
||||
|
||||
def _compile(expr):
|
||||
return re.compile(expr, re.UNICODE)
|
||||
|
||||
|
||||
def _get_token_collection(version_info):
|
||||
try:
|
||||
return _token_collection_cache[tuple(version_info)]
|
||||
except KeyError:
|
||||
_token_collection_cache[tuple(version_info)] = result = \
|
||||
_create_token_collection(version_info)
|
||||
return result
|
||||
|
||||
|
||||
fstring_string_single_line = _compile(r'(?:\{\{|\}\}|\\(?:\r\n?|\n)|[^{}\r\n])+')
|
||||
fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+')
|
||||
fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+')
|
||||
fstring_format_spec_multi_line = _compile(r'[^{}]+')
|
||||
|
||||
|
||||
def _create_token_collection(version_info):
|
||||
# Note: we use unicode matching for names ("\w") but ascii matching for
|
||||
# number literals.
|
||||
Whitespace = r'[ \f\t]*'
|
||||
whitespace = _compile(Whitespace)
|
||||
Comment = r'#[^\r\n]*'
|
||||
# Python 2 is pretty much not working properly anymore, we just ignore
|
||||
# parsing unicode properly, which is fine, I guess.
|
||||
if version_info[0] == 2:
|
||||
Name = r'([A-Za-z_0-9]+)'
|
||||
elif sys.version_info[0] == 2:
|
||||
# Unfortunately the regex engine cannot deal with the regex below, so
|
||||
# just use this one.
|
||||
Name = r'(\w+)'
|
||||
else:
|
||||
Name = u'([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)'
|
||||
|
||||
if version_info >= (3, 6):
|
||||
Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
|
||||
Binnumber = r'0[bB](?:_?[01])+'
|
||||
Octnumber = r'0[oO](?:_?[0-7])+'
|
||||
Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
|
||||
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
|
||||
Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
|
||||
Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
|
||||
r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
|
||||
Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
|
||||
Floatnumber = group(Pointfloat, Expfloat)
|
||||
Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
|
||||
else:
|
||||
Hexnumber = r'0[xX][0-9a-fA-F]+'
|
||||
Binnumber = r'0[bB][01]+'
|
||||
if version_info.major >= 3:
|
||||
Octnumber = r'0[oO][0-7]+'
|
||||
else:
|
||||
Octnumber = '0[oO]?[0-7]+'
|
||||
Decnumber = r'(?:0+|[1-9][0-9]*)'
|
||||
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
|
||||
if version_info[0] < 3:
|
||||
Intnumber += '[lL]?'
|
||||
Exponent = r'[eE][-+]?[0-9]+'
|
||||
Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
|
||||
Expfloat = r'[0-9]+' + Exponent
|
||||
Floatnumber = group(Pointfloat, Expfloat)
|
||||
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
|
||||
Number = group(Imagnumber, Floatnumber, Intnumber)
|
||||
|
||||
# Note that since _all_string_prefixes includes the empty string,
|
||||
# StringPrefix can be the empty string (making it optional).
|
||||
possible_prefixes = _all_string_prefixes(version_info)
|
||||
StringPrefix = group(*possible_prefixes)
|
||||
StringPrefixWithF = group(*_all_string_prefixes(version_info, include_fstring=True))
|
||||
fstring_prefixes = _all_string_prefixes(version_info, include_fstring=True, only_fstring=True)
|
||||
FStringStart = group(*fstring_prefixes)
|
||||
|
||||
# Tail end of ' string.
|
||||
Single = r"(?:\\.|[^'\\])*'"
|
||||
# Tail end of " string.
|
||||
Double = r'(?:\\.|[^"\\])*"'
|
||||
# Tail end of ''' string.
|
||||
Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''"
|
||||
# Tail end of """ string.
|
||||
Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""'
|
||||
Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""')
|
||||
|
||||
# Because of leftmost-then-longest match semantics, be sure to put the
|
||||
# longest operators first (e.g., if = came before ==, == would get
|
||||
# recognized as two instances of =).
|
||||
Operator = group(r"\*\*=?", r">>=?", r"<<=?",
|
||||
r"//=?", r"->",
|
||||
r"[+\-*/%&@`|^!=<>]=?",
|
||||
r"~")
|
||||
|
||||
Bracket = '[][(){}]'
|
||||
|
||||
special_args = [r'\r\n?', r'\n', r'[;.,@]']
|
||||
if version_info >= (3, 0):
|
||||
special_args.insert(0, r'\.\.\.')
|
||||
if version_info >= (3, 8):
|
||||
special_args.insert(0, ":=?")
|
||||
else:
|
||||
special_args.insert(0, ":")
|
||||
Special = group(*special_args)
|
||||
|
||||
Funny = group(Operator, Bracket, Special)
|
||||
|
||||
# First (or only) line of ' or " string.
|
||||
ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*"
|
||||
+ group("'", r'\\(?:\r\n?|\n)'),
|
||||
StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*'
|
||||
+ group('"', r'\\(?:\r\n?|\n)'))
|
||||
pseudo_extra_pool = [Comment, Triple]
|
||||
all_quotes = '"', "'", '"""', "'''"
|
||||
if fstring_prefixes:
|
||||
pseudo_extra_pool.append(FStringStart + group(*all_quotes))
|
||||
|
||||
PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool)
|
||||
PseudoToken = group(Whitespace, capture=True) + \
|
||||
group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
|
||||
|
||||
# For a given string prefix plus quotes, endpats maps it to a regex
|
||||
# to match the remainder of that string. _prefix can be empty, for
|
||||
# a normal single or triple quoted string (with no prefix).
|
||||
endpats = {}
|
||||
for _prefix in possible_prefixes:
|
||||
endpats[_prefix + "'"] = _compile(Single)
|
||||
endpats[_prefix + '"'] = _compile(Double)
|
||||
endpats[_prefix + "'''"] = _compile(Single3)
|
||||
endpats[_prefix + '"""'] = _compile(Double3)
|
||||
|
||||
# A set of all of the single and triple quoted string prefixes,
|
||||
# including the opening quotes.
|
||||
single_quoted = set()
|
||||
triple_quoted = set()
|
||||
fstring_pattern_map = {}
|
||||
for t in possible_prefixes:
|
||||
for quote in '"', "'":
|
||||
single_quoted.add(t + quote)
|
||||
|
||||
for quote in '"""', "'''":
|
||||
triple_quoted.add(t + quote)
|
||||
|
||||
for t in fstring_prefixes:
|
||||
for quote in all_quotes:
|
||||
fstring_pattern_map[t + quote] = quote
|
||||
|
||||
ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
|
||||
'finally', 'while', 'with', 'return', 'continue',
|
||||
'break', 'del', 'pass', 'global', 'assert')
|
||||
if version_info >= (3, 5):
|
||||
ALWAYS_BREAK_TOKENS += ('nonlocal', )
|
||||
pseudo_token_compiled = _compile(PseudoToken)
|
||||
return TokenCollection(
|
||||
pseudo_token_compiled, single_quoted, triple_quoted, endpats,
|
||||
whitespace, fstring_pattern_map, set(ALWAYS_BREAK_TOKENS)
|
||||
)
|
||||
|
||||
|
||||
class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
|
||||
@property
|
||||
def end_pos(self):
|
||||
lines = split_lines(self.string)
|
||||
if len(lines) > 1:
|
||||
return self.start_pos[0] + len(lines) - 1, 0
|
||||
else:
|
||||
return self.start_pos[0], self.start_pos[1] + len(self.string)
|
||||
|
||||
|
||||
class PythonToken(Token):
|
||||
def __repr__(self):
|
||||
return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
|
||||
self._replace(type=self.type.name))
|
||||
|
||||
|
||||
class FStringNode(object):
|
||||
def __init__(self, quote):
|
||||
self.quote = quote
|
||||
self.parentheses_count = 0
|
||||
self.previous_lines = ''
|
||||
self.last_string_start_pos = None
|
||||
# In the syntax there can be multiple format_spec's nested:
|
||||
# {x:{y:3}}
|
||||
self.format_spec_count = 0
|
||||
|
||||
def open_parentheses(self, character):
|
||||
self.parentheses_count += 1
|
||||
|
||||
def close_parentheses(self, character):
|
||||
self.parentheses_count -= 1
|
||||
if self.parentheses_count == 0:
|
||||
# No parentheses means that the format spec is also finished.
|
||||
self.format_spec_count = 0
|
||||
|
||||
def allow_multiline(self):
|
||||
return len(self.quote) == 3
|
||||
|
||||
def is_in_expr(self):
|
||||
return self.parentheses_count > self.format_spec_count
|
||||
|
||||
def is_in_format_spec(self):
|
||||
return not self.is_in_expr() and self.format_spec_count
|
||||
|
||||
|
||||
def _close_fstring_if_necessary(fstring_stack, string, line_nr, column, additional_prefix):
|
||||
for fstring_stack_index, node in enumerate(fstring_stack):
|
||||
lstripped_string = string.lstrip()
|
||||
len_lstrip = len(string) - len(lstripped_string)
|
||||
if lstripped_string.startswith(node.quote):
|
||||
token = PythonToken(
|
||||
FSTRING_END,
|
||||
node.quote,
|
||||
(line_nr, column + len_lstrip),
|
||||
prefix=additional_prefix+string[:len_lstrip],
|
||||
)
|
||||
additional_prefix = ''
|
||||
assert not node.previous_lines
|
||||
del fstring_stack[fstring_stack_index:]
|
||||
return token, '', len(node.quote) + len_lstrip
|
||||
return None, additional_prefix, 0
|
||||
|
||||
|
||||
def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
|
||||
tos = fstring_stack[-1]
|
||||
allow_multiline = tos.allow_multiline()
|
||||
if tos.is_in_format_spec():
|
||||
if allow_multiline:
|
||||
regex = fstring_format_spec_multi_line
|
||||
else:
|
||||
regex = fstring_format_spec_single_line
|
||||
else:
|
||||
if allow_multiline:
|
||||
regex = fstring_string_multi_line
|
||||
else:
|
||||
regex = fstring_string_single_line
|
||||
|
||||
match = regex.match(line, pos)
|
||||
if match is None:
|
||||
return tos.previous_lines, pos
|
||||
|
||||
if not tos.previous_lines:
|
||||
tos.last_string_start_pos = (lnum, pos)
|
||||
|
||||
string = match.group(0)
|
||||
for fstring_stack_node in fstring_stack:
|
||||
end_match = endpats[fstring_stack_node.quote].match(string)
|
||||
if end_match is not None:
|
||||
string = end_match.group(0)[:-len(fstring_stack_node.quote)]
|
||||
|
||||
new_pos = pos
|
||||
new_pos += len(string)
|
||||
# even if allow_multiline is False, we still need to check for trailing
|
||||
# newlines, because a single-line f-string can contain line continuations
|
||||
if string.endswith('\n') or string.endswith('\r'):
|
||||
tos.previous_lines += string
|
||||
string = ''
|
||||
else:
|
||||
string = tos.previous_lines + string
|
||||
|
||||
return string, new_pos
|
||||
|
||||
|
||||
def tokenize(code, version_info, start_pos=(1, 0)):
|
||||
"""Generate tokens from a the source code (string)."""
|
||||
lines = split_lines(code, keepends=True)
|
||||
return tokenize_lines(lines, version_info, start_pos=start_pos)
|
||||
|
||||
|
||||
def _print_tokens(func):
|
||||
"""
|
||||
A small helper function to help debug the tokenize_lines function.
|
||||
"""
|
||||
def wrapper(*args, **kwargs):
|
||||
for token in func(*args, **kwargs):
|
||||
print(token) # This print is intentional for debugging!
|
||||
yield token
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
# @_print_tokens
|
||||
def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first_token=True):
|
||||
"""
|
||||
A heavily modified Python standard library tokenizer.
|
||||
|
||||
Additionally to the default information, yields also the prefix of each
|
||||
token. This idea comes from lib2to3. The prefix contains all information
|
||||
that is irrelevant for the parser like newlines in parentheses or comments.
|
||||
"""
|
||||
def dedent_if_necessary(start):
|
||||
while start < indents[-1]:
|
||||
if start > indents[-2]:
|
||||
yield PythonToken(ERROR_DEDENT, '', (lnum, start), '')
|
||||
indents[-1] = start
|
||||
break
|
||||
indents.pop()
|
||||
yield PythonToken(DEDENT, '', spos, '')
|
||||
|
||||
pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
|
||||
fstring_pattern_map, always_break_tokens, = \
|
||||
_get_token_collection(version_info)
|
||||
paren_level = 0 # count parentheses
|
||||
if indents is None:
|
||||
indents = [0]
|
||||
max_ = 0
|
||||
numchars = '0123456789'
|
||||
contstr = ''
|
||||
contline = None
|
||||
# We start with a newline. This makes indent at the first position
|
||||
# possible. It's not valid Python, but still better than an INDENT in the
|
||||
# second line (and not in the first). This makes quite a few things in
|
||||
# Jedi's fast parser possible.
|
||||
new_line = True
|
||||
prefix = '' # Should never be required, but here for safety
|
||||
additional_prefix = ''
|
||||
lnum = start_pos[0] - 1
|
||||
fstring_stack = []
|
||||
for line in lines: # loop over lines in stream
|
||||
lnum += 1
|
||||
pos = 0
|
||||
max_ = len(line)
|
||||
if is_first_token:
|
||||
if line.startswith(BOM_UTF8_STRING):
|
||||
additional_prefix = BOM_UTF8_STRING
|
||||
line = line[1:]
|
||||
max_ = len(line)
|
||||
|
||||
# Fake that the part before was already parsed.
|
||||
line = '^' * start_pos[1] + line
|
||||
pos = start_pos[1]
|
||||
max_ += start_pos[1]
|
||||
|
||||
is_first_token = False
|
||||
|
||||
if contstr: # continued string
|
||||
endmatch = endprog.match(line)
|
||||
if endmatch:
|
||||
pos = endmatch.end(0)
|
||||
yield PythonToken(
|
||||
STRING, contstr + line[:pos],
|
||||
contstr_start, prefix)
|
||||
contstr = ''
|
||||
contline = None
|
||||
else:
|
||||
contstr = contstr + line
|
||||
contline = contline + line
|
||||
continue
|
||||
|
||||
while pos < max_:
|
||||
if fstring_stack:
|
||||
tos = fstring_stack[-1]
|
||||
if not tos.is_in_expr():
|
||||
string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos)
|
||||
if string:
|
||||
yield PythonToken(
|
||||
FSTRING_STRING, string,
|
||||
tos.last_string_start_pos,
|
||||
# Never has a prefix because it can start anywhere and
|
||||
# include whitespace.
|
||||
prefix=''
|
||||
)
|
||||
tos.previous_lines = ''
|
||||
continue
|
||||
if pos == max_:
|
||||
break
|
||||
|
||||
rest = line[pos:]
|
||||
fstring_end_token, additional_prefix, quote_length = _close_fstring_if_necessary(
|
||||
fstring_stack,
|
||||
rest,
|
||||
lnum,
|
||||
pos,
|
||||
additional_prefix,
|
||||
)
|
||||
pos += quote_length
|
||||
if fstring_end_token is not None:
|
||||
yield fstring_end_token
|
||||
continue
|
||||
|
||||
# in an f-string, match until the end of the string
|
||||
if fstring_stack:
|
||||
string_line = line
|
||||
for fstring_stack_node in fstring_stack:
|
||||
quote = fstring_stack_node.quote
|
||||
end_match = endpats[quote].match(line, pos)
|
||||
if end_match is not None:
|
||||
end_match_string = end_match.group(0)
|
||||
if len(end_match_string) - len(quote) + pos < len(string_line):
|
||||
string_line = line[:pos] + end_match_string[:-len(quote)]
|
||||
pseudomatch = pseudo_token.match(string_line, pos)
|
||||
else:
|
||||
pseudomatch = pseudo_token.match(line, pos)
|
||||
|
||||
if pseudomatch:
|
||||
prefix = additional_prefix + pseudomatch.group(1)
|
||||
additional_prefix = ''
|
||||
start, pos = pseudomatch.span(2)
|
||||
spos = (lnum, start)
|
||||
token = pseudomatch.group(2)
|
||||
if token == '':
|
||||
assert prefix
|
||||
additional_prefix = prefix
|
||||
# This means that we have a line with whitespace/comments at
|
||||
# the end, which just results in an endmarker.
|
||||
break
|
||||
initial = token[0]
|
||||
else:
|
||||
match = whitespace.match(line, pos)
|
||||
initial = line[match.end()]
|
||||
start = match.end()
|
||||
spos = (lnum, start)
|
||||
|
||||
if new_line and initial not in '\r\n#' and (initial != '\\' or pseudomatch is None):
|
||||
new_line = False
|
||||
if paren_level == 0 and not fstring_stack:
|
||||
indent_start = start
|
||||
if indent_start > indents[-1]:
|
||||
yield PythonToken(INDENT, '', spos, '')
|
||||
indents.append(indent_start)
|
||||
for t in dedent_if_necessary(indent_start):
|
||||
yield t
|
||||
|
||||
if not pseudomatch: # scan for tokens
|
||||
match = whitespace.match(line, pos)
|
||||
if new_line and paren_level == 0 and not fstring_stack:
|
||||
for t in dedent_if_necessary(match.end()):
|
||||
yield t
|
||||
pos = match.end()
|
||||
new_line = False
|
||||
yield PythonToken(
|
||||
ERRORTOKEN, line[pos], (lnum, pos),
|
||||
additional_prefix + match.group(0)
|
||||
)
|
||||
additional_prefix = ''
|
||||
pos += 1
|
||||
continue
|
||||
|
||||
if (initial in numchars # ordinary number
|
||||
or (initial == '.' and token != '.' and token != '...')):
|
||||
yield PythonToken(NUMBER, token, spos, prefix)
|
||||
elif pseudomatch.group(3) is not None: # ordinary name
|
||||
if token in always_break_tokens and (fstring_stack or paren_level):
|
||||
fstring_stack[:] = []
|
||||
paren_level = 0
|
||||
# We only want to dedent if the token is on a new line.
|
||||
m = re.match(r'[ \f\t]*$', line[:start])
|
||||
if m is not None:
|
||||
for t in dedent_if_necessary(m.end()):
|
||||
yield t
|
||||
if is_identifier(token):
|
||||
yield PythonToken(NAME, token, spos, prefix)
|
||||
else:
|
||||
for t in _split_illegal_unicode_name(token, spos, prefix):
|
||||
yield t # yield from Python 2
|
||||
elif initial in '\r\n':
|
||||
if any(not f.allow_multiline() for f in fstring_stack):
|
||||
# Would use fstring_stack.clear, but that's not available
|
||||
# in Python 2.
|
||||
fstring_stack[:] = []
|
||||
|
||||
if not new_line and paren_level == 0 and not fstring_stack:
|
||||
yield PythonToken(NEWLINE, token, spos, prefix)
|
||||
else:
|
||||
additional_prefix = prefix + token
|
||||
new_line = True
|
||||
elif initial == '#': # Comments
|
||||
assert not token.endswith("\n")
|
||||
if fstring_stack and fstring_stack[-1].is_in_expr():
|
||||
# `#` is not allowed in f-string expressions
|
||||
yield PythonToken(ERRORTOKEN, initial, spos, prefix)
|
||||
pos = start + 1
|
||||
else:
|
||||
additional_prefix = prefix + token
|
||||
elif token in triple_quoted:
|
||||
endprog = endpats[token]
|
||||
endmatch = endprog.match(line, pos)
|
||||
if endmatch: # all on one line
|
||||
pos = endmatch.end(0)
|
||||
token = line[start:pos]
|
||||
yield PythonToken(STRING, token, spos, prefix)
|
||||
else:
|
||||
contstr_start = spos # multiple lines
|
||||
contstr = line[start:]
|
||||
contline = line
|
||||
break
|
||||
|
||||
# Check up to the first 3 chars of the token to see if
|
||||
# they're in the single_quoted set. If so, they start
|
||||
# a string.
|
||||
# We're using the first 3, because we're looking for
|
||||
# "rb'" (for example) at the start of the token. If
|
||||
# we switch to longer prefixes, this needs to be
|
||||
# adjusted.
|
||||
# Note that initial == token[:1].
|
||||
# Also note that single quote checking must come after
|
||||
# triple quote checking (above).
|
||||
elif initial in single_quoted or \
|
||||
token[:2] in single_quoted or \
|
||||
token[:3] in single_quoted:
|
||||
if token[-1] in '\r\n': # continued string
|
||||
# This means that a single quoted string ends with a
|
||||
# backslash and is continued.
|
||||
contstr_start = lnum, start
|
||||
endprog = (endpats.get(initial) or endpats.get(token[1])
|
||||
or endpats.get(token[2]))
|
||||
contstr = line[start:]
|
||||
contline = line
|
||||
break
|
||||
else: # ordinary string
|
||||
yield PythonToken(STRING, token, spos, prefix)
|
||||
elif token in fstring_pattern_map: # The start of an fstring.
|
||||
fstring_stack.append(FStringNode(fstring_pattern_map[token]))
|
||||
yield PythonToken(FSTRING_START, token, spos, prefix)
|
||||
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'): # continued stmt
|
||||
additional_prefix += prefix + line[start:]
|
||||
break
|
||||
else:
|
||||
if token in '([{':
|
||||
if fstring_stack:
|
||||
fstring_stack[-1].open_parentheses(token)
|
||||
else:
|
||||
paren_level += 1
|
||||
elif token in ')]}':
|
||||
if fstring_stack:
|
||||
fstring_stack[-1].close_parentheses(token)
|
||||
else:
|
||||
if paren_level:
|
||||
paren_level -= 1
|
||||
elif token.startswith(':') and fstring_stack \
|
||||
and fstring_stack[-1].parentheses_count \
|
||||
- fstring_stack[-1].format_spec_count == 1:
|
||||
# `:` and `:=` both count
|
||||
fstring_stack[-1].format_spec_count += 1
|
||||
token = ':'
|
||||
pos = start + 1
|
||||
|
||||
yield PythonToken(OP, token, spos, prefix)
|
||||
|
||||
if contstr:
|
||||
yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
|
||||
if contstr.endswith('\n') or contstr.endswith('\r'):
|
||||
new_line = True
|
||||
|
||||
if fstring_stack:
|
||||
tos = fstring_stack[-1]
|
||||
if tos.previous_lines:
|
||||
yield PythonToken(
|
||||
FSTRING_STRING, tos.previous_lines,
|
||||
tos.last_string_start_pos,
|
||||
# Never has a prefix because it can start anywhere and
|
||||
# include whitespace.
|
||||
prefix=''
|
||||
)
|
||||
|
||||
end_pos = lnum, max_
|
||||
# As the last position we just take the maximally possible position. We
|
||||
# remove -1 for the last new line.
|
||||
for indent in indents[1:]:
|
||||
indents.pop()
|
||||
yield PythonToken(DEDENT, '', end_pos, '')
|
||||
yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)
|
||||
|
||||
|
||||
def _split_illegal_unicode_name(token, start_pos, prefix):
|
||||
def create_token():
|
||||
return PythonToken(ERRORTOKEN if is_illegal else NAME, found, pos, prefix)
|
||||
|
||||
found = ''
|
||||
is_illegal = False
|
||||
pos = start_pos
|
||||
for i, char in enumerate(token):
|
||||
if is_illegal:
|
||||
if is_identifier(char):
|
||||
yield create_token()
|
||||
found = char
|
||||
is_illegal = False
|
||||
prefix = ''
|
||||
pos = start_pos[0], start_pos[1] + i
|
||||
else:
|
||||
found += char
|
||||
else:
|
||||
new_found = found + char
|
||||
if is_identifier(new_found):
|
||||
found = new_found
|
||||
else:
|
||||
if found:
|
||||
yield create_token()
|
||||
prefix = ''
|
||||
pos = start_pos[0], start_pos[1] + i
|
||||
found = char
|
||||
is_illegal = True
|
||||
|
||||
if found:
|
||||
yield create_token()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) >= 2:
|
||||
path = sys.argv[1]
|
||||
with open(path) as f:
|
||||
code = f.read()
|
||||
else:
|
||||
code = sys.stdin.read()
|
||||
|
||||
from parso.utils import python_bytes_to_unicode, parse_version_string
|
||||
|
||||
if isinstance(code, bytes):
|
||||
code = python_bytes_to_unicode(code)
|
||||
|
||||
for token in tokenize(code, parse_version_string()):
|
||||
print(token)
|
24
venv/Lib/site-packages/parso/python/tokenize.pyi
Normal file
24
venv/Lib/site-packages/parso/python/tokenize.pyi
Normal file
|
@ -0,0 +1,24 @@
|
|||
from typing import Generator, Iterable, NamedTuple, Tuple
|
||||
|
||||
from parso.python.token import TokenType
|
||||
from parso.utils import PythonVersionInfo
|
||||
|
||||
class Token(NamedTuple):
|
||||
type: TokenType
|
||||
string: str
|
||||
start_pos: Tuple[int, int]
|
||||
prefix: str
|
||||
@property
|
||||
def end_pos(self) -> Tuple[int, int]: ...
|
||||
|
||||
class PythonToken(Token):
|
||||
def __repr__(self) -> str: ...
|
||||
|
||||
def tokenize(
|
||||
code: str, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0)
|
||||
) -> Generator[PythonToken, None, None]: ...
|
||||
def tokenize_lines(
|
||||
lines: Iterable[str],
|
||||
version_info: PythonVersionInfo,
|
||||
start_pos: Tuple[int, int] = (1, 0),
|
||||
) -> Generator[PythonToken, None, None]: ...
|
1270
venv/Lib/site-packages/parso/python/tree.py
Normal file
1270
venv/Lib/site-packages/parso/python/tree.py
Normal file
File diff suppressed because it is too large
Load diff
381
venv/Lib/site-packages/parso/tree.py
Normal file
381
venv/Lib/site-packages/parso/tree.py
Normal file
|
@ -0,0 +1,381 @@
|
|||
import sys
|
||||
from abc import abstractmethod, abstractproperty
|
||||
|
||||
from parso._compatibility import utf8_repr, encoding
|
||||
from parso.utils import split_lines
|
||||
|
||||
|
||||
def search_ancestor(node, *node_types):
|
||||
"""
|
||||
Recursively looks at the parents of a node and returns the first found node
|
||||
that matches node_types. Returns ``None`` if no matching node is found.
|
||||
|
||||
:param node: The ancestors of this node will be checked.
|
||||
:param node_types: type names that are searched for.
|
||||
:type node_types: tuple of str
|
||||
"""
|
||||
while True:
|
||||
node = node.parent
|
||||
if node is None or node.type in node_types:
|
||||
return node
|
||||
|
||||
|
||||
class NodeOrLeaf(object):
|
||||
"""
|
||||
The base class for nodes and leaves.
|
||||
"""
|
||||
__slots__ = ()
|
||||
type = None
|
||||
'''
|
||||
The type is a string that typically matches the types of the grammar file.
|
||||
'''
|
||||
|
||||
def get_root_node(self):
|
||||
"""
|
||||
Returns the root node of a parser tree. The returned node doesn't have
|
||||
a parent node like all the other nodes/leaves.
|
||||
"""
|
||||
scope = self
|
||||
while scope.parent is not None:
|
||||
scope = scope.parent
|
||||
return scope
|
||||
|
||||
def get_next_sibling(self):
|
||||
"""
|
||||
Returns the node immediately following this node in this parent's
|
||||
children list. If this node does not have a next sibling, it is None
|
||||
"""
|
||||
parent = self.parent
|
||||
if parent is None:
|
||||
return None
|
||||
|
||||
# Can't use index(); we need to test by identity
|
||||
for i, child in enumerate(parent.children):
|
||||
if child is self:
|
||||
try:
|
||||
return self.parent.children[i + 1]
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
def get_previous_sibling(self):
|
||||
"""
|
||||
Returns the node immediately preceding this node in this parent's
|
||||
children list. If this node does not have a previous sibling, it is
|
||||
None.
|
||||
"""
|
||||
parent = self.parent
|
||||
if parent is None:
|
||||
return None
|
||||
|
||||
# Can't use index(); we need to test by identity
|
||||
for i, child in enumerate(parent.children):
|
||||
if child is self:
|
||||
if i == 0:
|
||||
return None
|
||||
return self.parent.children[i - 1]
|
||||
|
||||
def get_previous_leaf(self):
|
||||
"""
|
||||
Returns the previous leaf in the parser tree.
|
||||
Returns `None` if this is the first element in the parser tree.
|
||||
"""
|
||||
if self.parent is None:
|
||||
return None
|
||||
|
||||
node = self
|
||||
while True:
|
||||
c = node.parent.children
|
||||
i = c.index(node)
|
||||
if i == 0:
|
||||
node = node.parent
|
||||
if node.parent is None:
|
||||
return None
|
||||
else:
|
||||
node = c[i - 1]
|
||||
break
|
||||
|
||||
while True:
|
||||
try:
|
||||
node = node.children[-1]
|
||||
except AttributeError: # A Leaf doesn't have children.
|
||||
return node
|
||||
|
||||
def get_next_leaf(self):
|
||||
"""
|
||||
Returns the next leaf in the parser tree.
|
||||
Returns None if this is the last element in the parser tree.
|
||||
"""
|
||||
if self.parent is None:
|
||||
return None
|
||||
|
||||
node = self
|
||||
while True:
|
||||
c = node.parent.children
|
||||
i = c.index(node)
|
||||
if i == len(c) - 1:
|
||||
node = node.parent
|
||||
if node.parent is None:
|
||||
return None
|
||||
else:
|
||||
node = c[i + 1]
|
||||
break
|
||||
|
||||
while True:
|
||||
try:
|
||||
node = node.children[0]
|
||||
except AttributeError: # A Leaf doesn't have children.
|
||||
return node
|
||||
|
||||
@abstractproperty
|
||||
def start_pos(self):
|
||||
"""
|
||||
Returns the starting position of the prefix as a tuple, e.g. `(3, 4)`.
|
||||
|
||||
:return tuple of int: (line, column)
|
||||
"""
|
||||
|
||||
@abstractproperty
|
||||
def end_pos(self):
|
||||
"""
|
||||
Returns the end position of the prefix as a tuple, e.g. `(3, 4)`.
|
||||
|
||||
:return tuple of int: (line, column)
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_start_pos_of_prefix(self):
|
||||
"""
|
||||
Returns the start_pos of the prefix. This means basically it returns
|
||||
the end_pos of the last prefix. The `get_start_pos_of_prefix()` of the
|
||||
prefix `+` in `2 + 1` would be `(1, 1)`, while the start_pos is
|
||||
`(1, 2)`.
|
||||
|
||||
:return tuple of int: (line, column)
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_first_leaf(self):
|
||||
"""
|
||||
Returns the first leaf of a node or itself if this is a leaf.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_last_leaf(self):
|
||||
"""
|
||||
Returns the last leaf of a node or itself if this is a leaf.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_code(self, include_prefix=True):
|
||||
"""
|
||||
Returns the code that was the input for the parser for this node.
|
||||
|
||||
:param include_prefix: Removes the prefix (whitespace and comments) of
|
||||
e.g. a statement.
|
||||
"""
|
||||
|
||||
|
||||
class Leaf(NodeOrLeaf):
|
||||
'''
|
||||
Leafs are basically tokens with a better API. Leafs exactly know where they
|
||||
were defined and what text preceeds them.
|
||||
'''
|
||||
__slots__ = ('value', 'parent', 'line', 'column', 'prefix')
|
||||
|
||||
def __init__(self, value, start_pos, prefix=''):
|
||||
self.value = value
|
||||
'''
|
||||
:py:func:`str` The value of the current token.
|
||||
'''
|
||||
self.start_pos = start_pos
|
||||
self.prefix = prefix
|
||||
'''
|
||||
:py:func:`str` Typically a mixture of whitespace and comments. Stuff
|
||||
that is syntactically irrelevant for the syntax tree.
|
||||
'''
|
||||
self.parent = None
|
||||
'''
|
||||
The parent :class:`BaseNode` of this leaf.
|
||||
'''
|
||||
|
||||
@property
|
||||
def start_pos(self):
|
||||
return self.line, self.column
|
||||
|
||||
@start_pos.setter
|
||||
def start_pos(self, value):
|
||||
self.line = value[0]
|
||||
self.column = value[1]
|
||||
|
||||
def get_start_pos_of_prefix(self):
|
||||
previous_leaf = self.get_previous_leaf()
|
||||
if previous_leaf is None:
|
||||
lines = split_lines(self.prefix)
|
||||
# + 1 is needed because split_lines always returns at least [''].
|
||||
return self.line - len(lines) + 1, 0 # It's the first leaf.
|
||||
return previous_leaf.end_pos
|
||||
|
||||
def get_first_leaf(self):
|
||||
return self
|
||||
|
||||
def get_last_leaf(self):
|
||||
return self
|
||||
|
||||
def get_code(self, include_prefix=True):
|
||||
if include_prefix:
|
||||
return self.prefix + self.value
|
||||
else:
|
||||
return self.value
|
||||
|
||||
@property
|
||||
def end_pos(self):
|
||||
lines = split_lines(self.value)
|
||||
end_pos_line = self.line + len(lines) - 1
|
||||
# Check for multiline token
|
||||
if self.line == end_pos_line:
|
||||
end_pos_column = self.column + len(lines[-1])
|
||||
else:
|
||||
end_pos_column = len(lines[-1])
|
||||
return end_pos_line, end_pos_column
|
||||
|
||||
@utf8_repr
|
||||
def __repr__(self):
|
||||
value = self.value
|
||||
if not value:
|
||||
value = self.type
|
||||
return "<%s: %s>" % (type(self).__name__, value)
|
||||
|
||||
|
||||
class TypedLeaf(Leaf):
|
||||
__slots__ = ('type',)
|
||||
|
||||
def __init__(self, type, value, start_pos, prefix=''):
|
||||
super(TypedLeaf, self).__init__(value, start_pos, prefix)
|
||||
self.type = type
|
||||
|
||||
|
||||
class BaseNode(NodeOrLeaf):
|
||||
"""
|
||||
The super class for all nodes.
|
||||
A node has children, a type and possibly a parent node.
|
||||
"""
|
||||
__slots__ = ('children', 'parent')
|
||||
type = None
|
||||
|
||||
def __init__(self, children):
|
||||
self.children = children
|
||||
"""
|
||||
A list of :class:`NodeOrLeaf` child nodes.
|
||||
"""
|
||||
self.parent = None
|
||||
'''
|
||||
The parent :class:`BaseNode` of this leaf.
|
||||
None if this is the root node.
|
||||
'''
|
||||
|
||||
@property
|
||||
def start_pos(self):
|
||||
return self.children[0].start_pos
|
||||
|
||||
def get_start_pos_of_prefix(self):
|
||||
return self.children[0].get_start_pos_of_prefix()
|
||||
|
||||
@property
|
||||
def end_pos(self):
|
||||
return self.children[-1].end_pos
|
||||
|
||||
def _get_code_for_children(self, children, include_prefix):
|
||||
if include_prefix:
|
||||
return "".join(c.get_code() for c in children)
|
||||
else:
|
||||
first = children[0].get_code(include_prefix=False)
|
||||
return first + "".join(c.get_code() for c in children[1:])
|
||||
|
||||
def get_code(self, include_prefix=True):
|
||||
return self._get_code_for_children(self.children, include_prefix)
|
||||
|
||||
def get_leaf_for_position(self, position, include_prefixes=False):
|
||||
"""
|
||||
Get the :py:class:`parso.tree.Leaf` at ``position``
|
||||
|
||||
:param tuple position: A position tuple, row, column. Rows start from 1
|
||||
:param bool include_prefixes: If ``False``, ``None`` will be returned if ``position`` falls
|
||||
on whitespace or comments before a leaf
|
||||
:return: :py:class:`parso.tree.Leaf` at ``position``, or ``None``
|
||||
"""
|
||||
def binary_search(lower, upper):
|
||||
if lower == upper:
|
||||
element = self.children[lower]
|
||||
if not include_prefixes and position < element.start_pos:
|
||||
# We're on a prefix.
|
||||
return None
|
||||
# In case we have prefixes, a leaf always matches
|
||||
try:
|
||||
return element.get_leaf_for_position(position, include_prefixes)
|
||||
except AttributeError:
|
||||
return element
|
||||
|
||||
|
||||
index = int((lower + upper) / 2)
|
||||
element = self.children[index]
|
||||
if position <= element.end_pos:
|
||||
return binary_search(lower, index)
|
||||
else:
|
||||
return binary_search(index + 1, upper)
|
||||
|
||||
if not ((1, 0) <= position <= self.children[-1].end_pos):
|
||||
raise ValueError('Please provide a position that exists within this node.')
|
||||
return binary_search(0, len(self.children) - 1)
|
||||
|
||||
def get_first_leaf(self):
|
||||
return self.children[0].get_first_leaf()
|
||||
|
||||
def get_last_leaf(self):
|
||||
return self.children[-1].get_last_leaf()
|
||||
|
||||
@utf8_repr
|
||||
def __repr__(self):
|
||||
code = self.get_code().replace('\n', ' ').replace('\r', ' ').strip()
|
||||
if not sys.version_info.major >= 3:
|
||||
code = code.encode(encoding, 'replace')
|
||||
return "<%s: %s@%s,%s>" % \
|
||||
(type(self).__name__, code, self.start_pos[0], self.start_pos[1])
|
||||
|
||||
|
||||
class Node(BaseNode):
|
||||
"""Concrete implementation for interior nodes."""
|
||||
__slots__ = ('type',)
|
||||
|
||||
def __init__(self, type, children):
|
||||
super(Node, self).__init__(children)
|
||||
self.type = type
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%s, %r)" % (self.__class__.__name__, self.type, self.children)
|
||||
|
||||
|
||||
class ErrorNode(BaseNode):
|
||||
"""
|
||||
A node that contains valid nodes/leaves that we're follow by a token that
|
||||
was invalid. This basically means that the leaf after this node is where
|
||||
Python would mark a syntax error.
|
||||
"""
|
||||
__slots__ = ()
|
||||
type = 'error_node'
|
||||
|
||||
|
||||
class ErrorLeaf(Leaf):
|
||||
"""
|
||||
A leaf that is either completely invalid in a language (like `$` in Python)
|
||||
or is invalid at that position. Like the star in `1 +* 1`.
|
||||
"""
|
||||
__slots__ = ('token_type',)
|
||||
type = 'error_leaf'
|
||||
|
||||
def __init__(self, token_type, value, start_pos, prefix=''):
|
||||
super(ErrorLeaf, self).__init__(value, start_pos, prefix)
|
||||
self.token_type = token_type
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s: %s:%s, %s>" % \
|
||||
(type(self).__name__, self.token_type, repr(self.value), self.start_pos)
|
185
venv/Lib/site-packages/parso/utils.py
Normal file
185
venv/Lib/site-packages/parso/utils.py
Normal file
|
@ -0,0 +1,185 @@
|
|||
from collections import namedtuple
|
||||
import re
|
||||
import sys
|
||||
from ast import literal_eval
|
||||
from functools import total_ordering
|
||||
|
||||
from parso._compatibility import unicode
|
||||
|
||||
# The following is a list in Python that are line breaks in str.splitlines, but
|
||||
# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
|
||||
# 0xA) are allowed to split lines.
|
||||
_NON_LINE_BREAKS = (
|
||||
u'\v', # Vertical Tabulation 0xB
|
||||
u'\f', # Form Feed 0xC
|
||||
u'\x1C', # File Separator
|
||||
u'\x1D', # Group Separator
|
||||
u'\x1E', # Record Separator
|
||||
u'\x85', # Next Line (NEL - Equivalent to CR+LF.
|
||||
# Used to mark end-of-line on some IBM mainframes.)
|
||||
u'\u2028', # Line Separator
|
||||
u'\u2029', # Paragraph Separator
|
||||
)
|
||||
|
||||
Version = namedtuple('Version', 'major, minor, micro')
|
||||
|
||||
|
||||
def split_lines(string, keepends=False):
|
||||
r"""
|
||||
Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`,
|
||||
looks at form feeds and other special characters as normal text. Just
|
||||
splits ``\n`` and ``\r\n``.
|
||||
Also different: Returns ``[""]`` for an empty string input.
|
||||
|
||||
In Python 2.7 form feeds are used as normal characters when using
|
||||
str.splitlines. However in Python 3 somewhere there was a decision to split
|
||||
also on form feeds.
|
||||
"""
|
||||
if keepends:
|
||||
lst = string.splitlines(True)
|
||||
|
||||
# We have to merge lines that were broken by form feed characters.
|
||||
merge = []
|
||||
for i, line in enumerate(lst):
|
||||
try:
|
||||
last_chr = line[-1]
|
||||
except IndexError:
|
||||
pass
|
||||
else:
|
||||
if last_chr in _NON_LINE_BREAKS:
|
||||
merge.append(i)
|
||||
|
||||
for index in reversed(merge):
|
||||
try:
|
||||
lst[index] = lst[index] + lst[index + 1]
|
||||
del lst[index + 1]
|
||||
except IndexError:
|
||||
# index + 1 can be empty and therefore there's no need to
|
||||
# merge.
|
||||
pass
|
||||
|
||||
# The stdlib's implementation of the end is inconsistent when calling
|
||||
# it with/without keepends. One time there's an empty string in the
|
||||
# end, one time there's none.
|
||||
if string.endswith('\n') or string.endswith('\r') or string == '':
|
||||
lst.append('')
|
||||
return lst
|
||||
else:
|
||||
return re.split(r'\n|\r\n|\r', string)
|
||||
|
||||
|
||||
def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):
|
||||
"""
|
||||
Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a
|
||||
unicode object like in :py:meth:`bytes.decode`.
|
||||
|
||||
:param encoding: See :py:meth:`bytes.decode` documentation.
|
||||
:param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be
|
||||
``'strict'``, ``'replace'`` or ``'ignore'``.
|
||||
"""
|
||||
def detect_encoding():
|
||||
"""
|
||||
For the implementation of encoding definitions in Python, look at:
|
||||
- http://www.python.org/dev/peps/pep-0263/
|
||||
- http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations
|
||||
"""
|
||||
byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
|
||||
if source.startswith(byte_mark):
|
||||
# UTF-8 byte-order mark
|
||||
return 'utf-8'
|
||||
|
||||
first_two_lines = re.match(br'(?:[^\n]*\n){0,2}', source).group(0)
|
||||
possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)",
|
||||
first_two_lines)
|
||||
if possible_encoding:
|
||||
return possible_encoding.group(1)
|
||||
else:
|
||||
# the default if nothing else has been set -> PEP 263
|
||||
return encoding
|
||||
|
||||
if isinstance(source, unicode):
|
||||
# only cast str/bytes
|
||||
return source
|
||||
|
||||
encoding = detect_encoding()
|
||||
if not isinstance(encoding, unicode):
|
||||
encoding = unicode(encoding, 'utf-8', 'replace')
|
||||
|
||||
try:
|
||||
# Cast to unicode
|
||||
return unicode(source, encoding, errors)
|
||||
except LookupError:
|
||||
if errors == 'replace':
|
||||
# This is a weird case that can happen if the given encoding is not
|
||||
# a valid encoding. This usually shouldn't happen with provided
|
||||
# encodings, but can happen if somebody uses encoding declarations
|
||||
# like `# coding: foo-8`.
|
||||
return unicode(source, 'utf-8', errors)
|
||||
raise
|
||||
|
||||
|
||||
def version_info():
|
||||
"""
|
||||
Returns a namedtuple of parso's version, similar to Python's
|
||||
``sys.version_info``.
|
||||
"""
|
||||
from parso import __version__
|
||||
tupl = re.findall(r'[a-z]+|\d+', __version__)
|
||||
return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)])
|
||||
|
||||
|
||||
def _parse_version(version):
|
||||
match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a|b|rc)\d)?$', version)
|
||||
if match is None:
|
||||
raise ValueError('The given version is not in the right format. '
|
||||
'Use something like "3.8" or "3".')
|
||||
|
||||
major = int(match.group(1))
|
||||
minor = match.group(2)
|
||||
if minor is None:
|
||||
# Use the latest Python in case it's not exactly defined, because the
|
||||
# grammars are typically backwards compatible?
|
||||
if major == 2:
|
||||
minor = "7"
|
||||
elif major == 3:
|
||||
minor = "6"
|
||||
else:
|
||||
raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.")
|
||||
minor = int(minor)
|
||||
return PythonVersionInfo(major, minor)
|
||||
|
||||
|
||||
@total_ordering
|
||||
class PythonVersionInfo(namedtuple('Version', 'major, minor')):
|
||||
def __gt__(self, other):
|
||||
if isinstance(other, tuple):
|
||||
if len(other) != 2:
|
||||
raise ValueError("Can only compare to tuples of length 2.")
|
||||
return (self.major, self.minor) > other
|
||||
super(PythonVersionInfo, self).__gt__(other)
|
||||
|
||||
return (self.major, self.minor)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, tuple):
|
||||
if len(other) != 2:
|
||||
raise ValueError("Can only compare to tuples of length 2.")
|
||||
return (self.major, self.minor) == other
|
||||
super(PythonVersionInfo, self).__eq__(other)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
|
||||
def parse_version_string(version=None):
|
||||
"""
|
||||
Checks for a valid version number (e.g. `3.8` or `2.7.1` or `3`) and
|
||||
returns a corresponding version info that is always two characters long in
|
||||
decimal.
|
||||
"""
|
||||
if version is None:
|
||||
version = '%s.%s' % sys.version_info[:2]
|
||||
if not isinstance(version, (unicode, str)):
|
||||
raise TypeError('version must be a string like "3.8"')
|
||||
|
||||
return _parse_version(version)
|
29
venv/Lib/site-packages/parso/utils.pyi
Normal file
29
venv/Lib/site-packages/parso/utils.pyi
Normal file
|
@ -0,0 +1,29 @@
|
|||
from typing import NamedTuple, Optional, Sequence, Union
|
||||
|
||||
class Version(NamedTuple):
|
||||
major: int
|
||||
minor: int
|
||||
micro: int
|
||||
|
||||
def split_lines(string: str, keepends: bool = ...) -> Sequence[str]: ...
|
||||
def python_bytes_to_unicode(
|
||||
source: Union[str, bytes], encoding: str = ..., errors: str = ...
|
||||
) -> str: ...
|
||||
def version_info() -> Version:
|
||||
"""
|
||||
Returns a namedtuple of parso's version, similar to Python's
|
||||
``sys.version_info``.
|
||||
"""
|
||||
...
|
||||
|
||||
class PythonVersionInfo(NamedTuple):
|
||||
major: int
|
||||
minor: int
|
||||
|
||||
def parse_version_string(version: Optional[str]) -> PythonVersionInfo:
|
||||
"""
|
||||
Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and
|
||||
returns a corresponding version info that is always two characters long in
|
||||
decimal.
|
||||
"""
|
||||
...
|
Loading…
Add table
Add a link
Reference in a new issue