Source code for parce.lang.javascript

# -*- coding: utf-8 -*-
#
# This file is part of the parce Python package.
#
# Copyright © 2019-2020 by Wilbert Berendsen <info@wilbertberendsen.nl>
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""
Parse JavaScript.

"""

__all__ = ('JavaScript',)

import re

from parce import Language, lexicon, skip, default_action
from parce.rule import TEXT, MATCH, arg, bygroup, call, dselect, select, words
from parce.action import (
    Bracket, Comment, Delimiter, Keyword, Literal, Name, Number, Operator,
    Separator, String)

from parce.unicharclass import categories
from . import javascript_words as js


RE_JS_IDENT_STARTCHAR = r'$_' + ''.join(map(categories.get, ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl']))
RE_JS_IDENT_CHAR = RE_JS_IDENT_STARTCHAR + '\u200c\u200d' + ''.join(map(categories.get, ['Mn', 'Mc', 'Nd', 'Pc']))
RE_JS_ESCAPE_CHAR = r'\\u[0-9a-fA-F]{4}'
RE_JS_IDENT_TOKEN = _I_ = fr'(?:[{RE_JS_IDENT_STARTCHAR}]|{RE_JS_ESCAPE_CHAR})' \
                fr'(?:[{RE_JS_IDENT_CHAR}]+|{RE_JS_ESCAPE_CHAR})*'

RE_JS_DECIMAL_NUMBER = r'\d+(?:_\d+)*n|(?:\.\d+(?:_\d+)*|\d+(?:_\d+)*(?:\.(?:\d+(?:_\d+)*)?)?)(?:[eE][-+]\d+(?:_\d+)*)?'
RE_JS_REGEXP = r'/(?:\\.|[^\\\n/\[]|\[(?:\\.|[^\\\n\]])*\])+/[gimsuy]?'

[docs]class JavaScript(Language): @lexicon def root(cls): yield r"'", String.Start, cls.string("'") yield r'"', String.Start, cls.string('"') yield r'`', String.Start, cls.template_literal yield '//', Comment, cls.singleline_comment yield r'/\*', Comment.Start, cls.multiline_comment yield fr'(const|let|var)\s+({_I_})\b', bygroup(Keyword, Name.Variable.Definition) yield fr'(function)\s+({_I_})\b', bygroup(Keyword, Name.Function.Definition) yield fr'(new)\s+({_I_})\b', bygroup(Keyword, Name.Class.Definition) yield words(js.JAVASCRIPT_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword yield words(js.JAVASCRIPT_DECLARATORS, prefix=r'\b', suffix=r'\b'), Keyword yield words(js.JAVASCRIPT_RESERVED_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword.Reserved yield words(js.JAVASCRIPT_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant yield words(js.JAVASCRIPT_BUILTINS, prefix=r'\b', suffix=r'\b'), Name.Builtin yield words(js.JAVASCRIPT_PROTOTYPES, prefix=r'\b', suffix=r'\b'), Name.Builtin yield fr'(\.)\s*({_I_})\b(?:\s*([\(\[]))?', bygroup(Delimiter, dselect(MATCH[3], {'(': Name.Method}, Name.Attribute), Delimiter), \ dselect(MATCH[3], {'(': cls.call, '[': cls.index}) yield fr'({_I_})(?:\s*([\(\[]))?', bygroup( dselect(MATCH[2], {'(': Name.Function}, Name.Variable), Delimiter), \ dselect(MATCH[2], {'(': cls.call, '[': cls.index}) yield fr'{_I_}\b', select(call(str.isupper, TEXT), Name.Variable, Name.Class) ## numerical values (recently, underscore support inside numbers was added) yield '0[oO](?:_?[0-7])+n?', Number yield '0[bB](?:_?[01])+n?', Number yield '0[xX](?:_?[0-9a-fA-F])+n?', Number yield RE_JS_DECIMAL_NUMBER, Number yield r'\{', Bracket.Start, cls.scope yield r'\[', Bracket.Start, cls.array yield r'\(', Delimiter, cls.paren yield RE_JS_REGEXP, Literal.Regexp yield r'(?:<<|>>>?|[&|^*/%+-])=', Operator.Assignment yield r'&&?|\|\|?|<<|>>>?|[!=]==?|<=?|>=?|\*\*|[-+~!/*%^?:,]', Operator yield r'=', Operator.Assignment yield r';', Delimiter @lexicon def scope(cls): yield r'\}', Bracket.End, -1 yield from cls.root @lexicon def call(cls): """name(...) syntax.""" yield r'\)', Delimiter, -1 yield from cls.root
[docs] @classmethod def expression(cls): """Stuff between ( ) or [ ]""" yield r'\{', Bracket.Start, cls.object yield from cls.root
@lexicon def object(cls): """An object (dictionary) { ... }.""" yield r'[:,]', Separator yield r'\}', Bracket.End, -1 yield from cls.expression() @lexicon def array(cls): """An array [ ... ].""" yield r',', Separator yield r'\]', Bracket.End, -1 yield from cls.expression() @lexicon def paren(cls): """An expression between ( ... ).""" yield r',', Separator yield r'\)', Delimiter, -1 yield from cls.expression() @lexicon def index(cls): """name[...] syntax.""" yield r'\]', Delimiter, -1 yield from cls.root @lexicon def string(cls): yield arg(), String.End, -1 yield (r'''\\(?:[0"'\\nrvtbf]''' r'|x[a-fA-F0-9]{2}' r'|u[a-fA-F0-9]{4}' r'|u\{[a-fA-F0-9]{1,6}\})'), String.Escape yield default_action, String @lexicon def template_literal(cls): yield from cls.string('`') yield r'\\[$`]', String.Escape yield r'\$\{', Delimiter.Template, cls.template_literal_expression @lexicon def template_literal_expression(cls): yield r'\}', Delimiter.Template, -1 yield from cls.root #------------------ comments ------------------------- @lexicon(re_flags=re.MULTILINE) def singleline_comment(cls): yield '$', None, -1 yield from cls.comment_common() @lexicon def multiline_comment(cls): yield r'\*/', Comment.End, -1 yield from cls.comment_common()