Source code for parce.lang.toml

# -*- coding: utf-8 -*-
#
# This file is part of the parce Python package.
#
# Copyright © 2019-2020 by Wilbert Berendsen <info@wilbertberendsen.nl>
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""
Tom's Obvious, Minimal Language.
https://github.com/toml-lang/toml
"""

__all__ = ('Toml',)

import re

from parce import Language, lexicon, skip, default_action, default_target
from parce.action import (
    Bracket, Comment, Delimiter, Invalid, Literal, Number, Name, Operator,
    Separator, String, Whitespace)
from parce.rule import TEXT, bygroup, call, select


# https://tools.ietf.org/html/rfc3339#section-5.6
RE_FULL_DATE = r"\d{4}-\d\d-\d\d"
RE_PARTIAL_TIME = r"\d\d:\d\d:\d\d(?:\.\d+)?"
RE_TIME_NUMOFFSET = r"[+-]\d\d:\d\d"
RE_TIME_OFFSET = r"(?:[zZ]|" + RE_TIME_NUMOFFSET + ")"
RE_FULL_TIME = RE_PARTIAL_TIME + RE_TIME_OFFSET + '?' # offset may be omitted
RE_DATE_TIME = RE_FULL_DATE + "[ tT]" + RE_FULL_TIME

RE_HEX = r'0[xX](?:_?[0-9a-fA-F])+'
RE_OCT = r'0[oO](?:_?[0-7])+'
RE_BIN = r'0[bB](?:_?[01])+'
RE_DEC = r'[-+]?\d(?:_?\d)*(?:\.(?:\d(?:_?\d)*)+)?(?:[eE][-+]?\d(?:_?\d)*)?'


[docs]class Toml(Language): @lexicon def root(cls): yield '#', Comment, cls.comment yield r'(\[\[)(?:[ \t]*(\.))?', bygroup(Bracket.Start, Invalid), cls.array_table yield r'(\[)(?:[ \t]*(\.))?', bygroup(Bracket.Start, Invalid), cls.table yield r'=[^\n#]*', Invalid yield r'\.[^\n#]*', Invalid yield r'\s+', skip yield default_target, cls.key @lexicon def table(cls): yield r'(?:(\.)[ \t]*)?(\])([^\n#]*)', \ bygroup(Invalid, Bracket.End, select(call(str.isspace, TEXT), Invalid, skip)), -1 yield from cls.keys(Name.Namespace) @lexicon def array_table(cls): yield r'(?:(\.)[ \t]*)?(\]\])([^\n#]*)', \ bygroup(Invalid, Bracket.End, select(call(str.isspace, TEXT), Invalid, skip)), -1 yield from cls.keys(Name.Namespace) @lexicon(re_flags=re.MULTILINE) def key(cls): yield '#', Comment, -1, cls.comment yield r'=', Operator.Assignment, -1, cls.value yield from cls.keys() @lexicon(re_flags=re.MULTILINE) def value(cls): yield '#', Comment, -1, cls.comment yield r'$', None, -1 yield from cls.values()
[docs] @classmethod def keys(cls, action=Name.Variable): yield r'[A-Za-z0-9_-]+', action yield r'''(\.)(?=[ \t]*[\}\],'"A-Za-z0-9_-])''', Delimiter.Dot yield r'"', String, cls.string_basic yield r"'", String, cls.string_literal yield r'[ \t]+', skip yield r'[^\s#=\]]+', Invalid
[docs] @classmethod def values(cls): yield '#', Comment, cls.comment yield r'\[', Bracket, cls.array yield r'\{', Bracket, cls.inline_table yield r'"""', String, cls.string_multiline_basic yield r'"', String, cls.string_basic yield r"(''')(\n)?", bygroup(String, Whitespace), cls.string_multiline_literal yield r"'", String, cls.string_literal yield RE_DATE_TIME, Literal.Timestamp yield RE_FULL_DATE, Literal.Timestamp yield RE_FULL_TIME, Literal.Timestamp yield RE_OCT, Number yield RE_BIN, Number yield RE_HEX, Number yield RE_DEC, Number yield r"[-+]?\b(?:inf|nan)\b", Number yield r"\b(?:true|false)\b", Name.Constant yield r'\S+', Invalid
@lexicon def array(cls): yield r'(\])([^,}#\n\]]*)', bygroup(Bracket, select(call(str.isspace, TEXT), Invalid, skip)), -1 yield r',', Separator yield from cls.values() @lexicon def inline_table(cls): yield '#', Comment, cls.comment yield r'\}', Bracket, -1 yield r'=', Operator.Assignment.Invalid yield r'\s+', skip yield default_target, cls.inline_key @lexicon def inline_key(cls): yield r'=', Operator.Assignment, -1, cls.inline_value yield r'\}', Bracket.Invalid, -1 yield from cls.keys() @lexicon def inline_value(cls): yield '#', Comment, cls.comment yield r'\}', Bracket, -2 yield r',', Separator, -1 yield from cls.values() @lexicon def string_multiline_basic(cls): yield r'(""")([^\s,}#\]]*)', bygroup(String, Invalid), -1 yield r'\\\s+', Whitespace yield r'\\(?:["\\bfnrt]|u[0-9a-fA-F]{4})', String.Escape yield r'\\.', String.Invalid yield default_action, String @lexicon(re_flags=re.MULTILINE) def string_basic(cls): yield r'(")([^\s,}#\]=]*)', bygroup(String, Invalid), -1 yield r'\\(?:["\\bfnrt]|u[0-9a-fA-F]{4})', String.Escape yield r'\\.', String.Invalid yield r'[^"]*?$', String.Invalid, -1 yield default_action, String @lexicon def string_multiline_literal(cls): yield r"(''')([^\s,}#\]]*)", bygroup(String, Invalid), -1 yield default_action, String @lexicon(re_flags=re.MULTILINE) def string_literal(cls): yield r"(')([^\s,}#\]=]*)", bygroup(String, Invalid), -1 yield r"[^']*?$", String.Invalid, -1 yield default_action, String @lexicon(re_flags=re.MULTILINE) def comment(cls): yield from cls.comment_common() yield r'$', Comment, -1