# -*- coding: utf-8 -*- """ pygments.lexers.ezhil ~~~~~~~~~~~~~~~~~~~~~ Pygments lexers for Ezhil language. :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ import re from pygments.lexer import RegexLexer, include, words from pygments.token import Keyword, Text, Comment, Name from pygments.token import String, Number, Punctuation, Operator __all__ = ['EzhilLexer'] class EzhilLexer(RegexLexer): """ Lexer for `Ezhil, a Tamil script-based programming language `_ .. versionadded:: 2.1 """ name = 'Ezhil' aliases = ['ezhil'] filenames = ['*.n'] mimetypes = ['text/x-ezhil'] flags = re.MULTILINE | re.UNICODE # Refer to tamil.utf8.tamil_letters from open-tamil for a stricter version of this. # This much simpler version is close enough, and includes combining marks. _TALETTERS = '[a-zA-Z_]|[\u0b80-\u0bff]' tokens = { 'root': [ include('keywords'), (r'#.*\n', Comment.Single), (r'[@+/*,^\-%]|[!<>=]=?|&&?|\|\|?', Operator), ('இல்', Operator.Word), (words(('assert', 'max', 'min', 'நீளம்', 'சரம்_இடமாற்று', 'சரம்_கண்டுபிடி', 'பட்டியல்', 'பின்இணை', 'வரிசைப்படுத்து', 'எடு', 'தலைகீழ்', 'நீட்டிக்க', 'நுழைக்க', 'வை', 'கோப்பை_திற', 'கோப்பை_எழுது', 'கோப்பை_மூடு', 'pi', 'sin', 'cos', 'tan', 'sqrt', 'hypot', 'pow', 'exp', 'log', 'log10', 'exit', ), suffix=r'\b'), Name.Builtin), (r'(True|False)\b', Keyword.Constant), (r'[^\S\n]+', Text), include('identifier'), include('literal'), (r'[(){}\[\]:;.]', Punctuation), ], 'keywords': [ ('பதிப்பி|தேர்ந்தெடு|தேர்வு|ஏதேனில்|ஆனால்|இல்லைஆனால்|இல்லை|ஆக|ஒவ்வொன்றாக|இல்|வரை|செய்|முடியேனில்|பின்கொடு|முடி|நிரல்பாகம்|தொடர்|நிறுத்து|நிரல்பாகம்', Keyword), ], 'identifier': [ ('(?:'+_TALETTERS+')(?:[0-9]|'+_TALETTERS+')*', Name), ], 'literal': [ (r'".*?"', String), (r'(?u)\d+((\.\d*)?[eE][+-]?\d+|\.\d*)', Number.Float), (r'(?u)\d+', Number.Integer), ] } def analyse_text(text): """This language uses Tamil-script. We'll assume that if there's a decent amount of Tamil-characters, it's this language. This assumption is obviously horribly off if someone uses string literals in tamil in another language.""" if len(re.findall(r'[\u0b80-\u0bff]', text)) > 10: return 0.25 def __init__(self, **options): super().__init__(**options) self.encoding = options.get('encoding', 'utf-8')