# -*- coding: utf-8 -*- """ pygments.lexers.python ~~~~~~~~~~~~~~~~~~~~~~ Lexers for Python and related languages. :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ import re from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ default, words, combined, do_insertions from pygments.util import get_bool_opt, shebang_matches from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic, Other, Error from pygments import unistring as uni __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer', 'Python2Lexer', 'Python2TracebackLexer', 'CythonLexer', 'DgLexer', 'NumPyLexer'] line_re = re.compile('.*?\n') class PythonLexer(RegexLexer): """ For `Python `_ source code (version 3.x). .. versionadded:: 0.10 .. versionchanged:: 2.5 This is now the default ``PythonLexer``. It is still available as the alias ``Python3Lexer``. """ name = 'Python' aliases = ['python', 'py', 'sage', 'python3', 'py3'] filenames = [ '*.py', '*.pyw', # Jython '*.jy', # Sage '*.sage', # SCons '*.sc', 'SConstruct', 'SConscript', # Skylark/Starlark (used by Bazel, Buck, and Pants) '*.bzl', 'BUCK', 'BUILD', 'BUILD.bazel', 'WORKSPACE', # Twisted Application infrastructure '*.tac', ] mimetypes = ['text/x-python', 'application/x-python', 'text/x-python3', 'application/x-python3'] flags = re.MULTILINE | re.UNICODE uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue) def innerstring_rules(ttype): return [ # the old style '%s' % (...) string formatting (still valid in Py3) (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[E-GXc-giorsaux%]', String.Interpol), # the new style '{}'.format(...) string formatting (r'\{' r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name r'(\![sra])?' # conversion r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?' r'\}', String.Interpol), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%{\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r'%|(\{{1,2})', ttype) # newlines are an error (use "nl" state) ] def fstring_rules(ttype): return [ # Assuming that a '}' is the closing brace after format specifier. # Sadly, this means that we won't detect syntax error. But it's # more important to parse correct syntax correctly, than to # highlight invalid syntax. (r'\}', String.Interpol), (r'\{', String.Interpol, 'expr-inside-fstring'), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"{}\n]+', ttype), (r'[\'"\\]', ttype), # newlines are an error (use "nl" state) ] tokens = { 'root': [ (r'\n', Text), (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', bygroups(Text, String.Affix, String.Doc)), (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", bygroups(Text, String.Affix, String.Doc)), (r'\A#!.+$', Comment.Hashbang), (r'#.*$', Comment.Single), (r'\\\n', Text), (r'\\', Text), include('keywords'), (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'), (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'), (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'fromimport'), (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'import'), include('expr'), ], 'expr': [ # raw f-strings ('(?i)(rf|fr)(""")', bygroups(String.Affix, String.Double), 'tdqf'), ("(?i)(rf|fr)(''')", bygroups(String.Affix, String.Single), 'tsqf'), ('(?i)(rf|fr)(")', bygroups(String.Affix, String.Double), 'dqf'), ("(?i)(rf|fr)(')", bygroups(String.Affix, String.Single), 'sqf'), # non-raw f-strings ('([fF])(""")', bygroups(String.Affix, String.Double), combined('fstringescape', 'tdqf')), ("([fF])(''')", bygroups(String.Affix, String.Single), combined('fstringescape', 'tsqf')), ('([fF])(")', bygroups(String.Affix, String.Double), combined('fstringescape', 'dqf')), ("([fF])(')", bygroups(String.Affix, String.Single), combined('fstringescape', 'sqf')), # raw strings ('(?i)(rb|br|r)(""")', bygroups(String.Affix, String.Double), 'tdqs'), ("(?i)(rb|br|r)(''')", bygroups(String.Affix, String.Single), 'tsqs'), ('(?i)(rb|br|r)(")', bygroups(String.Affix, String.Double), 'dqs'), ("(?i)(rb|br|r)(')", bygroups(String.Affix, String.Single), 'sqs'), # non-raw strings ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), combined('stringescape', 'tdqs')), ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), combined('stringescape', 'tsqs')), ('([uUbB]?)(")', bygroups(String.Affix, String.Double), combined('stringescape', 'dqs')), ("([uUbB]?)(')", bygroups(String.Affix, String.Single), combined('stringescape', 'sqs')), (r'[^\S\n]+', Text), (r'!=|==|<<|>>|:=|[-~+/*%=<>&^|.]', Operator), (r'[]{}:(),;[]', Punctuation), (r'(in|is|and|or|not)\b', Operator.Word), include('expr-keywords'), include('builtins'), include('magicfuncs'), include('magicvars'), include('name'), include('numbers'), ], 'expr-inside-fstring': [ (r'[{([]', Punctuation, 'expr-inside-fstring-inner'), # without format specifier (r'(=\s*)?' # debug (https://bugs.python.org/issue36817) r'(\![sraf])?' # conversion r'\}', String.Interpol, '#pop'), # with format specifier # we'll catch the remaining '}' in the outer scope (r'(=\s*)?' # debug (https://bugs.python.org/issue36817) r'(\![sraf])?' # conversion r':', String.Interpol, '#pop'), (r'\s+', Text), # allow new lines include('expr'), ], 'expr-inside-fstring-inner': [ (r'[{([]', Punctuation, 'expr-inside-fstring-inner'), (r'[])}]', Punctuation, '#pop'), (r'\s+', Text), # allow new lines include('expr'), ], 'expr-keywords': [ # Based on https://docs.python.org/3/reference/expressions.html (words(( 'async for', 'await', 'else', 'for', 'if', 'lambda', 'yield', 'yield from'), suffix=r'\b'), Keyword), (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant), ], 'keywords': [ (words(( 'assert', 'async', 'await', 'break', 'continue', 'del', 'elif', 'else', 'except', 'finally', 'for', 'global', 'if', 'lambda', 'pass', 'raise', 'nonlocal', 'return', 'try', 'while', 'yield', 'yield from', 'as', 'with'), suffix=r'\b'), Keyword), (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant), ], 'builtins': [ (words(( '__import__', 'abs', 'all', 'any', 'bin', 'bool', 'bytearray', 'bytes', 'chr', 'classmethod', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'filter', 'float', 'format', 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'isinstance', 'issubclass', 'iter', 'len', 'list', 'locals', 'map', 'max', 'memoryview', 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'print', 'property', 'range', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type', 'vars', 'zip'), prefix=r'(?`_ source code. .. versionchanged:: 2.5 This class has been renamed from ``PythonLexer``. ``PythonLexer`` now refers to the Python 3 variant. File name patterns like ``*.py`` have been moved to Python 3 as well. """ name = 'Python 2.x' aliases = ['python2', 'py2'] filenames = [] # now taken over by PythonLexer (3.x) mimetypes = ['text/x-python2', 'application/x-python2'] def innerstring_rules(ttype): return [ # the old style '%s' % (...) string formatting (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[E-GXc-giorsux%]', String.Interpol), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r'%', ttype), # newlines are an error (use "nl" state) ] tokens = { 'root': [ (r'\n', Text), (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', bygroups(Text, String.Affix, String.Doc)), (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", bygroups(Text, String.Affix, String.Doc)), (r'[^\S\n]+', Text), (r'\A#!.+$', Comment.Hashbang), (r'#.*$', Comment.Single), (r'[]{}:(),;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), (r'(in|is|and|or|not)\b', Operator.Word), (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator), include('keywords'), (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'), (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'), (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'fromimport'), (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'import'), include('builtins'), include('magicfuncs'), include('magicvars'), include('backtick'), ('([rR]|[uUbB][rR]|[rR][uUbB])(""")', bygroups(String.Affix, String.Double), 'tdqs'), ("([rR]|[uUbB][rR]|[rR][uUbB])(''')", bygroups(String.Affix, String.Single), 'tsqs'), ('([rR]|[uUbB][rR]|[rR][uUbB])(")', bygroups(String.Affix, String.Double), 'dqs'), ("([rR]|[uUbB][rR]|[rR][uUbB])(')", bygroups(String.Affix, String.Single), 'sqs'), ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), combined('stringescape', 'tdqs')), ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), combined('stringescape', 'tsqs')), ('([uUbB]?)(")', bygroups(String.Affix, String.Double), combined('stringescape', 'dqs')), ("([uUbB]?)(')", bygroups(String.Affix, String.Single), combined('stringescape', 'sqs')), include('name'), include('numbers'), ], 'keywords': [ (words(( 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except', 'exec', 'finally', 'for', 'global', 'if', 'lambda', 'pass', 'print', 'raise', 'return', 'try', 'while', 'yield', 'yield from', 'as', 'with'), suffix=r'\b'), Keyword), ], 'builtins': [ (words(( '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max', 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'vars', 'xrange', 'zip'), prefix=r'(?>> a = 'foo' >>> print a foo >>> 1 / 0 Traceback (most recent call last): File "", line 1, in ZeroDivisionError: integer division or modulo by zero Additional options: `python3` Use Python 3 lexer for code. Default is ``True``. .. versionadded:: 1.0 .. versionchanged:: 2.5 Now defaults to ``True``. """ name = 'Python console session' aliases = ['pycon'] mimetypes = ['text/x-python-doctest'] def __init__(self, **options): self.python3 = get_bool_opt(options, 'python3', True) Lexer.__init__(self, **options) def get_tokens_unprocessed(self, text): if self.python3: pylexer = PythonLexer(**self.options) tblexer = PythonTracebackLexer(**self.options) else: pylexer = Python2Lexer(**self.options) tblexer = Python2TracebackLexer(**self.options) curcode = '' insertions = [] curtb = '' tbindex = 0 tb = 0 for match in line_re.finditer(text): line = match.group() if line.startswith('>>> ') or line.startswith('... '): tb = 0 insertions.append((len(curcode), [(0, Generic.Prompt, line[:4])])) curcode += line[4:] elif line.rstrip() == '...' and not tb: # only a new >>> prompt can end an exception block # otherwise an ellipsis in place of the traceback frames # will be mishandled insertions.append((len(curcode), [(0, Generic.Prompt, '...')])) curcode += line[3:] else: if curcode: yield from do_insertions( insertions, pylexer.get_tokens_unprocessed(curcode)) curcode = '' insertions = [] if (line.startswith('Traceback (most recent call last):') or re.match(' File "[^"]+", line \\d+\\n$', line)): tb = 1 curtb = line tbindex = match.start() elif line == 'KeyboardInterrupt\n': yield match.start(), Name.Class, line elif tb: curtb += line if not (line.startswith(' ') or line.strip() == '...'): tb = 0 for i, t, v in tblexer.get_tokens_unprocessed(curtb): yield tbindex+i, t, v curtb = '' else: yield match.start(), Generic.Output, line if curcode: yield from do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)) if curtb: for i, t, v in tblexer.get_tokens_unprocessed(curtb): yield tbindex+i, t, v class PythonTracebackLexer(RegexLexer): """ For Python 3.x tracebacks, with support for chained exceptions. .. versionadded:: 1.0 .. versionchanged:: 2.5 This is now the default ``PythonTracebackLexer``. It is still available as the alias ``Python3TracebackLexer``. """ name = 'Python Traceback' aliases = ['pytb', 'py3tb'] filenames = ['*.pytb', '*.py3tb'] mimetypes = ['text/x-python-traceback', 'text/x-python3-traceback'] tokens = { 'root': [ (r'\n', Text), (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'), (r'^During handling of the above exception, another ' r'exception occurred:\n\n', Generic.Traceback), (r'^The above exception was the direct cause of the ' r'following exception:\n\n', Generic.Traceback), (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'), (r'^.*\n', Other), ], 'intb': [ (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)), (r'^( File )("[^"]+")(, line )(\d+)(\n)', bygroups(Text, Name.Builtin, Text, Number, Text)), (r'^( )(.+)(\n)', bygroups(Text, using(PythonLexer), Text)), (r'^([ \t]*)(\.\.\.)(\n)', bygroups(Text, Comment, Text)), # for doctests... (r'^([^:]+)(: )(.+)(\n)', bygroups(Generic.Error, Text, Name, Text), '#pop'), (r'^([a-zA-Z_][\w.]*)(:?\n)', bygroups(Generic.Error, Text), '#pop') ], } Python3TracebackLexer = PythonTracebackLexer class Python2TracebackLexer(RegexLexer): """ For Python tracebacks. .. versionadded:: 0.7 .. versionchanged:: 2.5 This class has been renamed from ``PythonTracebackLexer``. ``PythonTracebackLexer`` now refers to the Python 3 variant. """ name = 'Python 2.x Traceback' aliases = ['py2tb'] filenames = ['*.py2tb'] mimetypes = ['text/x-python2-traceback'] tokens = { 'root': [ # Cover both (most recent call last) and (innermost last) # The optional ^C allows us to catch keyboard interrupt signals. (r'^(\^C)?(Traceback.*\n)', bygroups(Text, Generic.Traceback), 'intb'), # SyntaxError starts with this. (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'), (r'^.*\n', Other), ], 'intb': [ (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)), (r'^( File )("[^"]+")(, line )(\d+)(\n)', bygroups(Text, Name.Builtin, Text, Number, Text)), (r'^( )(.+)(\n)', bygroups(Text, using(Python2Lexer), Text)), (r'^([ \t]*)(\.\.\.)(\n)', bygroups(Text, Comment, Text)), # for doctests... (r'^([^:]+)(: )(.+)(\n)', bygroups(Generic.Error, Text, Name, Text), '#pop'), (r'^([a-zA-Z_]\w*)(:?\n)', bygroups(Generic.Error, Text), '#pop') ], } class CythonLexer(RegexLexer): """ For Pyrex and `Cython `_ source code. .. versionadded:: 1.1 """ name = 'Cython' aliases = ['cython', 'pyx', 'pyrex'] filenames = ['*.pyx', '*.pxd', '*.pxi'] mimetypes = ['text/x-cython', 'application/x-cython'] tokens = { 'root': [ (r'\n', Text), (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)), (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)), (r'[^\S\n]+', Text), (r'#.*$', Comment), (r'[]{}:(),;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), (r'(in|is|and|or|not)\b', Operator.Word), (r'(<)([a-zA-Z0-9.?]+)(>)', bygroups(Punctuation, Keyword.Type, Punctuation)), (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator), (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)', bygroups(Keyword, Number.Integer, Operator, Name, Operator, Name, Punctuation)), include('keywords'), (r'(def|property)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'(cp?def)(\s+)', bygroups(Keyword, Text), 'cdef'), # (should actually start a block with only cdefs) (r'(cdef)(:)', bygroups(Keyword, Punctuation)), (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(from)(\s+)', bygroups(Keyword, Text), 'fromimport'), (r'(c?import)(\s+)', bygroups(Keyword, Text), 'import'), include('builtins'), include('backtick'), ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'), ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'), ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'), ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'), ('[uU]?"""', String, combined('stringescape', 'tdqs')), ("[uU]?'''", String, combined('stringescape', 'tsqs')), ('[uU]?"', String, combined('stringescape', 'dqs')), ("[uU]?'", String, combined('stringescape', 'sqs')), include('name'), include('numbers'), ], 'keywords': [ (words(( 'assert', 'async', 'await', 'break', 'by', 'continue', 'ctypedef', 'del', 'elif', 'else', 'except', 'except?', 'exec', 'finally', 'for', 'fused', 'gil', 'global', 'if', 'include', 'lambda', 'nogil', 'pass', 'print', 'raise', 'return', 'try', 'while', 'yield', 'as', 'with'), suffix=r'\b'), Keyword), (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc), ], 'builtins': [ (words(( '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max', 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'unsigned', 'vars', 'xrange', 'zip'), prefix=r'(?`_, a functional and object-oriented programming language running on the CPython 3 VM. .. versionadded:: 1.6 """ name = 'dg' aliases = ['dg'] filenames = ['*.dg'] mimetypes = ['text/x-dg'] tokens = { 'root': [ (r'\s+', Text), (r'#.*?$', Comment.Single), (r'(?i)0b[01]+', Number.Bin), (r'(?i)0o[0-7]+', Number.Oct), (r'(?i)0x[0-9a-f]+', Number.Hex), (r'(?i)[+-]?[0-9]+\.[0-9]+(e[+-]?[0-9]+)?j?', Number.Float), (r'(?i)[+-]?[0-9]+e[+-]?\d+j?', Number.Float), (r'(?i)[+-]?[0-9]+j?', Number.Integer), (r"(?i)(br|r?b?)'''", String, combined('stringescape', 'tsqs', 'string')), (r'(?i)(br|r?b?)"""', String, combined('stringescape', 'tdqs', 'string')), (r"(?i)(br|r?b?)'", String, combined('stringescape', 'sqs', 'string')), (r'(?i)(br|r?b?)"', String, combined('stringescape', 'dqs', 'string')), (r"`\w+'*`", Operator), (r'\b(and|in|is|or|where)\b', Operator.Word), (r'[!$%&*+\-./:<-@\\^|~;,]+', Operator), (words(( 'bool', 'bytearray', 'bytes', 'classmethod', 'complex', 'dict', 'dict\'', 'float', 'frozenset', 'int', 'list', 'list\'', 'memoryview', 'object', 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str', 'super', 'tuple', 'tuple\'', 'type'), prefix=r'(?