530 lines
18 KiB
Python
530 lines
18 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
pygments.lexers.erlang
|
|
~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
Lexers for Erlang.
|
|
|
|
:copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
|
|
:license: BSD, see LICENSE for details.
|
|
"""
|
|
|
|
import re
|
|
|
|
from pygments.lexer import Lexer, RegexLexer, bygroups, words, do_insertions, \
|
|
include, default
|
|
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
|
|
Number, Punctuation, Generic
|
|
|
|
__all__ = ['ErlangLexer', 'ErlangShellLexer', 'ElixirConsoleLexer',
|
|
'ElixirLexer']
|
|
|
|
|
|
line_re = re.compile('.*?\n')
|
|
|
|
|
|
class ErlangLexer(RegexLexer):
|
|
"""
|
|
For the Erlang functional programming language.
|
|
|
|
Blame Jeremy Thurgood (http://jerith.za.net/).
|
|
|
|
.. versionadded:: 0.9
|
|
"""
|
|
|
|
name = 'Erlang'
|
|
aliases = ['erlang']
|
|
filenames = ['*.erl', '*.hrl', '*.es', '*.escript']
|
|
mimetypes = ['text/x-erlang']
|
|
|
|
keywords = (
|
|
'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if',
|
|
'let', 'of', 'query', 'receive', 'try', 'when',
|
|
)
|
|
|
|
builtins = ( # See erlang(3) man page
|
|
'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list',
|
|
'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions',
|
|
'byte_size', 'cancel_timer', 'check_process_code', 'delete_module',
|
|
'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit',
|
|
'float', 'float_to_list', 'fun_info', 'fun_to_list',
|
|
'function_exported', 'garbage_collect', 'get', 'get_keys',
|
|
'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary',
|
|
'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean',
|
|
'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list',
|
|
'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record',
|
|
'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom',
|
|
'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom',
|
|
'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple',
|
|
'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5',
|
|
'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor',
|
|
'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2',
|
|
'pid_to_list', 'port_close', 'port_command', 'port_connect',
|
|
'port_control', 'port_call', 'port_info', 'port_to_list',
|
|
'process_display', 'process_flag', 'process_info', 'purge_module',
|
|
'put', 'read_timer', 'ref_to_list', 'register', 'resume_process',
|
|
'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie',
|
|
'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor',
|
|
'spawn_opt', 'split_binary', 'start_timer', 'statistics',
|
|
'suspend_process', 'system_flag', 'system_info', 'system_monitor',
|
|
'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered',
|
|
'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list',
|
|
'universaltime_to_localtime', 'unlink', 'unregister', 'whereis'
|
|
)
|
|
|
|
operators = r'(\+\+?|--?|\*|/|<|>|/=|=:=|=/=|=<|>=|==?|<-|!|\?)'
|
|
word_operators = (
|
|
'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor',
|
|
'div', 'not', 'or', 'orelse', 'rem', 'xor'
|
|
)
|
|
|
|
atom_re = r"(?:[a-z]\w*|'[^\n']*[^\\]')"
|
|
|
|
variable_re = r'(?:[A-Z_]\w*)'
|
|
|
|
esc_char_re = r'[bdefnrstv\'"\\]'
|
|
esc_octal_re = r'[0-7][0-7]?[0-7]?'
|
|
esc_hex_re = r'(?:x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\})'
|
|
esc_ctrl_re = r'\^[a-zA-Z]'
|
|
escape_re = r'(?:\\(?:'+esc_char_re+r'|'+esc_octal_re+r'|'+esc_hex_re+r'|'+esc_ctrl_re+r'))'
|
|
|
|
macro_re = r'(?:'+variable_re+r'|'+atom_re+r')'
|
|
|
|
base_re = r'(?:[2-9]|[12][0-9]|3[0-6])'
|
|
|
|
tokens = {
|
|
'root': [
|
|
(r'\s+', Text),
|
|
(r'%.*\n', Comment),
|
|
(words(keywords, suffix=r'\b'), Keyword),
|
|
(words(builtins, suffix=r'\b'), Name.Builtin),
|
|
(words(word_operators, suffix=r'\b'), Operator.Word),
|
|
(r'^-', Punctuation, 'directive'),
|
|
(operators, Operator),
|
|
(r'"', String, 'string'),
|
|
(r'<<', Name.Label),
|
|
(r'>>', Name.Label),
|
|
('(' + atom_re + ')(:)', bygroups(Name.Namespace, Punctuation)),
|
|
('(?:^|(?<=:))(' + atom_re + r')(\s*)(\()',
|
|
bygroups(Name.Function, Text, Punctuation)),
|
|
(r'[+-]?' + base_re + r'#[0-9a-zA-Z]+', Number.Integer),
|
|
(r'[+-]?\d+', Number.Integer),
|
|
(r'[+-]?\d+.\d+', Number.Float),
|
|
(r'[]\[:_@\".{}()|;,]', Punctuation),
|
|
(variable_re, Name.Variable),
|
|
(atom_re, Name),
|
|
(r'\?'+macro_re, Name.Constant),
|
|
(r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char),
|
|
(r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label),
|
|
|
|
# Erlang script shebang
|
|
(r'\A#!.+\n', Comment.Hashbang),
|
|
|
|
# EEP 43: Maps
|
|
# http://www.erlang.org/eeps/eep-0043.html
|
|
(r'#\{', Punctuation, 'map_key'),
|
|
],
|
|
'string': [
|
|
(escape_re, String.Escape),
|
|
(r'"', String, '#pop'),
|
|
(r'~[0-9.*]*[~#+BPWXb-ginpswx]', String.Interpol),
|
|
(r'[^"\\~]+', String),
|
|
(r'~', String),
|
|
],
|
|
'directive': [
|
|
(r'(define)(\s*)(\()('+macro_re+r')',
|
|
bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'),
|
|
(r'(record)(\s*)(\()('+macro_re+r')',
|
|
bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'),
|
|
(atom_re, Name.Entity, '#pop'),
|
|
],
|
|
'map_key': [
|
|
include('root'),
|
|
(r'=>', Punctuation, 'map_val'),
|
|
(r':=', Punctuation, 'map_val'),
|
|
(r'\}', Punctuation, '#pop'),
|
|
],
|
|
'map_val': [
|
|
include('root'),
|
|
(r',', Punctuation, '#pop'),
|
|
(r'(?=\})', Punctuation, '#pop'),
|
|
],
|
|
}
|
|
|
|
|
|
class ErlangShellLexer(Lexer):
|
|
"""
|
|
Shell sessions in erl (for Erlang code).
|
|
|
|
.. versionadded:: 1.1
|
|
"""
|
|
name = 'Erlang erl session'
|
|
aliases = ['erl']
|
|
filenames = ['*.erl-sh']
|
|
mimetypes = ['text/x-erl-shellsession']
|
|
|
|
_prompt_re = re.compile(r'(?:\([\w@_.]+\))?\d+>(?=\s|\Z)')
|
|
|
|
def get_tokens_unprocessed(self, text):
|
|
erlexer = ErlangLexer(**self.options)
|
|
|
|
curcode = ''
|
|
insertions = []
|
|
for match in line_re.finditer(text):
|
|
line = match.group()
|
|
m = self._prompt_re.match(line)
|
|
if m is not None:
|
|
end = m.end()
|
|
insertions.append((len(curcode),
|
|
[(0, Generic.Prompt, line[:end])]))
|
|
curcode += line[end:]
|
|
else:
|
|
if curcode:
|
|
yield from do_insertions(insertions,
|
|
erlexer.get_tokens_unprocessed(curcode))
|
|
curcode = ''
|
|
insertions = []
|
|
if line.startswith('*'):
|
|
yield match.start(), Generic.Traceback, line
|
|
else:
|
|
yield match.start(), Generic.Output, line
|
|
if curcode:
|
|
yield from do_insertions(insertions,
|
|
erlexer.get_tokens_unprocessed(curcode))
|
|
|
|
|
|
def gen_elixir_string_rules(name, symbol, token):
|
|
states = {}
|
|
states['string_' + name] = [
|
|
(r'[^#%s\\]+' % (symbol,), token),
|
|
include('escapes'),
|
|
(r'\\.', token),
|
|
(r'(%s)' % (symbol,), bygroups(token), "#pop"),
|
|
include('interpol')
|
|
]
|
|
return states
|
|
|
|
|
|
def gen_elixir_sigstr_rules(term, term_class, token, interpol=True):
|
|
if interpol:
|
|
return [
|
|
(r'[^#%s\\]+' % (term_class,), token),
|
|
include('escapes'),
|
|
(r'\\.', token),
|
|
(r'%s[a-zA-Z]*' % (term,), token, '#pop'),
|
|
include('interpol')
|
|
]
|
|
else:
|
|
return [
|
|
(r'[^%s\\]+' % (term_class,), token),
|
|
(r'\\.', token),
|
|
(r'%s[a-zA-Z]*' % (term,), token, '#pop'),
|
|
]
|
|
|
|
|
|
class ElixirLexer(RegexLexer):
|
|
"""
|
|
For the `Elixir language <http://elixir-lang.org>`_.
|
|
|
|
.. versionadded:: 1.5
|
|
"""
|
|
|
|
name = 'Elixir'
|
|
aliases = ['elixir', 'ex', 'exs']
|
|
filenames = ['*.ex', '*.eex', '*.exs']
|
|
mimetypes = ['text/x-elixir']
|
|
|
|
KEYWORD = ('fn', 'do', 'end', 'after', 'else', 'rescue', 'catch')
|
|
KEYWORD_OPERATOR = ('not', 'and', 'or', 'when', 'in')
|
|
BUILTIN = (
|
|
'case', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'raise',
|
|
'quote', 'unquote', 'unquote_splicing', 'throw', 'super',
|
|
)
|
|
BUILTIN_DECLARATION = (
|
|
'def', 'defp', 'defmodule', 'defprotocol', 'defmacro', 'defmacrop',
|
|
'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback',
|
|
)
|
|
|
|
BUILTIN_NAMESPACE = ('import', 'require', 'use', 'alias')
|
|
CONSTANT = ('nil', 'true', 'false')
|
|
|
|
PSEUDO_VAR = ('_', '__MODULE__', '__DIR__', '__ENV__', '__CALLER__')
|
|
|
|
OPERATORS3 = (
|
|
'<<<', '>>>', '|||', '&&&', '^^^', '~~~', '===', '!==',
|
|
'~>>', '<~>', '|~>', '<|>',
|
|
)
|
|
OPERATORS2 = (
|
|
'==', '!=', '<=', '>=', '&&', '||', '<>', '++', '--', '|>', '=~',
|
|
'->', '<-', '|', '.', '=', '~>', '<~',
|
|
)
|
|
OPERATORS1 = ('<', '>', '+', '-', '*', '/', '!', '^', '&')
|
|
|
|
PUNCTUATION = (
|
|
'\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']',
|
|
)
|
|
|
|
def get_tokens_unprocessed(self, text):
|
|
for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
|
|
if token is Name:
|
|
if value in self.KEYWORD:
|
|
yield index, Keyword, value
|
|
elif value in self.KEYWORD_OPERATOR:
|
|
yield index, Operator.Word, value
|
|
elif value in self.BUILTIN:
|
|
yield index, Keyword, value
|
|
elif value in self.BUILTIN_DECLARATION:
|
|
yield index, Keyword.Declaration, value
|
|
elif value in self.BUILTIN_NAMESPACE:
|
|
yield index, Keyword.Namespace, value
|
|
elif value in self.CONSTANT:
|
|
yield index, Name.Constant, value
|
|
elif value in self.PSEUDO_VAR:
|
|
yield index, Name.Builtin.Pseudo, value
|
|
else:
|
|
yield index, token, value
|
|
else:
|
|
yield index, token, value
|
|
|
|
def gen_elixir_sigil_rules():
|
|
# all valid sigil terminators (excluding heredocs)
|
|
terminators = [
|
|
(r'\{', r'\}', '}', 'cb'),
|
|
(r'\[', r'\]', r'\]', 'sb'),
|
|
(r'\(', r'\)', ')', 'pa'),
|
|
('<', '>', '>', 'ab'),
|
|
('/', '/', '/', 'slas'),
|
|
(r'\|', r'\|', '|', 'pipe'),
|
|
('"', '"', '"', 'quot'),
|
|
("'", "'", "'", 'apos'),
|
|
]
|
|
|
|
# heredocs have slightly different rules
|
|
triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')]
|
|
|
|
token = String.Other
|
|
states = {'sigils': []}
|
|
|
|
for term, name in triquotes:
|
|
states['sigils'] += [
|
|
(r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc),
|
|
(name + '-end', name + '-intp')),
|
|
(r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc),
|
|
(name + '-end', name + '-no-intp')),
|
|
]
|
|
|
|
states[name + '-end'] = [
|
|
(r'[a-zA-Z]+', token, '#pop'),
|
|
default('#pop'),
|
|
]
|
|
states[name + '-intp'] = [
|
|
(r'^\s*' + term, String.Heredoc, '#pop'),
|
|
include('heredoc_interpol'),
|
|
]
|
|
states[name + '-no-intp'] = [
|
|
(r'^\s*' + term, String.Heredoc, '#pop'),
|
|
include('heredoc_no_interpol'),
|
|
]
|
|
|
|
for lterm, rterm, rterm_class, name in terminators:
|
|
states['sigils'] += [
|
|
(r'~[a-z]' + lterm, token, name + '-intp'),
|
|
(r'~[A-Z]' + lterm, token, name + '-no-intp'),
|
|
]
|
|
states[name + '-intp'] = \
|
|
gen_elixir_sigstr_rules(rterm, rterm_class, token)
|
|
states[name + '-no-intp'] = \
|
|
gen_elixir_sigstr_rules(rterm, rterm_class, token, interpol=False)
|
|
|
|
return states
|
|
|
|
op3_re = "|".join(re.escape(s) for s in OPERATORS3)
|
|
op2_re = "|".join(re.escape(s) for s in OPERATORS2)
|
|
op1_re = "|".join(re.escape(s) for s in OPERATORS1)
|
|
ops_re = r'(?:%s|%s|%s)' % (op3_re, op2_re, op1_re)
|
|
punctuation_re = "|".join(re.escape(s) for s in PUNCTUATION)
|
|
alnum = r'\w'
|
|
name_re = r'(?:\.\.\.|[a-z_]%s*[!?]?)' % alnum
|
|
modname_re = r'[A-Z]%(alnum)s*(?:\.[A-Z]%(alnum)s*)*' % {'alnum': alnum}
|
|
complex_name_re = r'(?:%s|%s|%s)' % (name_re, modname_re, ops_re)
|
|
special_atom_re = r'(?:\.\.\.|<<>>|%\{\}|%|\{\})'
|
|
|
|
long_hex_char_re = r'(\\x\{)([\da-fA-F]+)(\})'
|
|
hex_char_re = r'(\\x[\da-fA-F]{1,2})'
|
|
escape_char_re = r'(\\[abdefnrstv])'
|
|
|
|
tokens = {
|
|
'root': [
|
|
(r'\s+', Text),
|
|
(r'#.*$', Comment.Single),
|
|
|
|
# Various kinds of characters
|
|
(r'(\?)' + long_hex_char_re,
|
|
bygroups(String.Char,
|
|
String.Escape, Number.Hex, String.Escape)),
|
|
(r'(\?)' + hex_char_re,
|
|
bygroups(String.Char, String.Escape)),
|
|
(r'(\?)' + escape_char_re,
|
|
bygroups(String.Char, String.Escape)),
|
|
(r'\?\\?.', String.Char),
|
|
|
|
# '::' has to go before atoms
|
|
(r':::', String.Symbol),
|
|
(r'::', Operator),
|
|
|
|
# atoms
|
|
(r':' + special_atom_re, String.Symbol),
|
|
(r':' + complex_name_re, String.Symbol),
|
|
(r':"', String.Symbol, 'string_double_atom'),
|
|
(r":'", String.Symbol, 'string_single_atom'),
|
|
|
|
# [keywords: ...]
|
|
(r'(%s|%s)(:)(?=\s|\n)' % (special_atom_re, complex_name_re),
|
|
bygroups(String.Symbol, Punctuation)),
|
|
|
|
# @attributes
|
|
(r'@' + name_re, Name.Attribute),
|
|
|
|
# identifiers
|
|
(name_re, Name),
|
|
(r'(%%?)(%s)' % (modname_re,), bygroups(Punctuation, Name.Class)),
|
|
|
|
# operators and punctuation
|
|
(op3_re, Operator),
|
|
(op2_re, Operator),
|
|
(punctuation_re, Punctuation),
|
|
(r'&\d', Name.Entity), # anon func arguments
|
|
(op1_re, Operator),
|
|
|
|
# numbers
|
|
(r'0b[01]+', Number.Bin),
|
|
(r'0o[0-7]+', Number.Oct),
|
|
(r'0x[\da-fA-F]+', Number.Hex),
|
|
(r'\d(_?\d)*\.\d(_?\d)*([eE][-+]?\d(_?\d)*)?', Number.Float),
|
|
(r'\d(_?\d)*', Number.Integer),
|
|
|
|
# strings and heredocs
|
|
(r'"""\s*', String.Heredoc, 'heredoc_double'),
|
|
(r"'''\s*$", String.Heredoc, 'heredoc_single'),
|
|
(r'"', String.Double, 'string_double'),
|
|
(r"'", String.Single, 'string_single'),
|
|
|
|
include('sigils'),
|
|
|
|
(r'%\{', Punctuation, 'map_key'),
|
|
(r'\{', Punctuation, 'tuple'),
|
|
],
|
|
'heredoc_double': [
|
|
(r'^\s*"""', String.Heredoc, '#pop'),
|
|
include('heredoc_interpol'),
|
|
],
|
|
'heredoc_single': [
|
|
(r"^\s*'''", String.Heredoc, '#pop'),
|
|
include('heredoc_interpol'),
|
|
],
|
|
'heredoc_interpol': [
|
|
(r'[^#\\\n]+', String.Heredoc),
|
|
include('escapes'),
|
|
(r'\\.', String.Heredoc),
|
|
(r'\n+', String.Heredoc),
|
|
include('interpol'),
|
|
],
|
|
'heredoc_no_interpol': [
|
|
(r'[^\\\n]+', String.Heredoc),
|
|
(r'\\.', String.Heredoc),
|
|
(r'\n+', String.Heredoc),
|
|
],
|
|
'escapes': [
|
|
(long_hex_char_re,
|
|
bygroups(String.Escape, Number.Hex, String.Escape)),
|
|
(hex_char_re, String.Escape),
|
|
(escape_char_re, String.Escape),
|
|
],
|
|
'interpol': [
|
|
(r'#\{', String.Interpol, 'interpol_string'),
|
|
],
|
|
'interpol_string': [
|
|
(r'\}', String.Interpol, "#pop"),
|
|
include('root')
|
|
],
|
|
'map_key': [
|
|
include('root'),
|
|
(r':', Punctuation, 'map_val'),
|
|
(r'=>', Punctuation, 'map_val'),
|
|
(r'\}', Punctuation, '#pop'),
|
|
],
|
|
'map_val': [
|
|
include('root'),
|
|
(r',', Punctuation, '#pop'),
|
|
(r'(?=\})', Punctuation, '#pop'),
|
|
],
|
|
'tuple': [
|
|
include('root'),
|
|
(r'\}', Punctuation, '#pop'),
|
|
],
|
|
}
|
|
tokens.update(gen_elixir_string_rules('double', '"', String.Double))
|
|
tokens.update(gen_elixir_string_rules('single', "'", String.Single))
|
|
tokens.update(gen_elixir_string_rules('double_atom', '"', String.Symbol))
|
|
tokens.update(gen_elixir_string_rules('single_atom', "'", String.Symbol))
|
|
tokens.update(gen_elixir_sigil_rules())
|
|
|
|
|
|
class ElixirConsoleLexer(Lexer):
|
|
"""
|
|
For Elixir interactive console (iex) output like:
|
|
|
|
.. sourcecode:: iex
|
|
|
|
iex> [head | tail] = [1,2,3]
|
|
[1,2,3]
|
|
iex> head
|
|
1
|
|
iex> tail
|
|
[2,3]
|
|
iex> [head | tail]
|
|
[1,2,3]
|
|
iex> length [head | tail]
|
|
3
|
|
|
|
.. versionadded:: 1.5
|
|
"""
|
|
|
|
name = 'Elixir iex session'
|
|
aliases = ['iex']
|
|
mimetypes = ['text/x-elixir-shellsession']
|
|
|
|
_prompt_re = re.compile(r'(iex|\.{3})((?:\([\w@_.]+\))?\d+|\(\d+\))?> ')
|
|
|
|
def get_tokens_unprocessed(self, text):
|
|
exlexer = ElixirLexer(**self.options)
|
|
|
|
curcode = ''
|
|
in_error = False
|
|
insertions = []
|
|
for match in line_re.finditer(text):
|
|
line = match.group()
|
|
if line.startswith('** '):
|
|
in_error = True
|
|
insertions.append((len(curcode),
|
|
[(0, Generic.Error, line[:-1])]))
|
|
curcode += line[-1:]
|
|
else:
|
|
m = self._prompt_re.match(line)
|
|
if m is not None:
|
|
in_error = False
|
|
end = m.end()
|
|
insertions.append((len(curcode),
|
|
[(0, Generic.Prompt, line[:end])]))
|
|
curcode += line[end:]
|
|
else:
|
|
if curcode:
|
|
yield from do_insertions(
|
|
insertions, exlexer.get_tokens_unprocessed(curcode))
|
|
curcode = ''
|
|
insertions = []
|
|
token = Generic.Error if in_error else Generic.Output
|
|
yield match.start(), token, line
|
|
if curcode:
|
|
yield from do_insertions(
|
|
insertions, exlexer.get_tokens_unprocessed(curcode))
|