# -*- coding: utf-8 -*-
#
# This file is part of the parce Python package.
#
# Copyright © 2019-2020 by Wilbert Berendsen <info@wilbertberendsen.nl>
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
Tom's Obvious, Minimal Language.
https://github.com/toml-lang/toml
"""
__all__ = ('Toml',)
import re
from parce import Language, lexicon, skip, default_action, default_target
from parce.action import (
Bracket, Comment, Delimiter, Invalid, Literal, Number, Name, Operator,
Separator, String, Whitespace)
from parce.rule import TEXT, bygroup, call, select
# https://tools.ietf.org/html/rfc3339#section-5.6
RE_FULL_DATE = r"\d{4}-\d\d-\d\d"
RE_PARTIAL_TIME = r"\d\d:\d\d:\d\d(?:\.\d+)?"
RE_TIME_NUMOFFSET = r"[+-]\d\d:\d\d"
RE_TIME_OFFSET = r"(?:[zZ]|" + RE_TIME_NUMOFFSET + ")"
RE_FULL_TIME = RE_PARTIAL_TIME + RE_TIME_OFFSET + '?' # offset may be omitted
RE_DATE_TIME = RE_FULL_DATE + "[ tT]" + RE_FULL_TIME
RE_HEX = r'0[xX](?:_?[0-9a-fA-F])+'
RE_OCT = r'0[oO](?:_?[0-7])+'
RE_BIN = r'0[bB](?:_?[01])+'
RE_DEC = r'[-+]?\d(?:_?\d)*(?:\.(?:\d(?:_?\d)*)+)?(?:[eE][-+]?\d(?:_?\d)*)?'
[docs]class Toml(Language):
@lexicon
def root(cls):
yield '#', Comment, cls.comment
yield r'(\[\[)(?:[ \t]*(\.))?', bygroup(Bracket.Start, Invalid), cls.array_table
yield r'(\[)(?:[ \t]*(\.))?', bygroup(Bracket.Start, Invalid), cls.table
yield r'=[^\n#]*', Invalid
yield r'\.[^\n#]*', Invalid
yield r'\s+', skip
yield default_target, cls.key
@lexicon
def table(cls):
yield r'(?:(\.)[ \t]*)?(\])([^\n#]*)', \
bygroup(Invalid, Bracket.End, select(call(str.isspace, TEXT), Invalid, skip)), -1
yield from cls.keys(Name.Namespace)
@lexicon
def array_table(cls):
yield r'(?:(\.)[ \t]*)?(\]\])([^\n#]*)', \
bygroup(Invalid, Bracket.End, select(call(str.isspace, TEXT), Invalid, skip)), -1
yield from cls.keys(Name.Namespace)
@lexicon(re_flags=re.MULTILINE)
def key(cls):
yield '#', Comment, -1, cls.comment
yield r'=', Operator.Assignment, -1, cls.value
yield from cls.keys()
@lexicon(re_flags=re.MULTILINE)
def value(cls):
yield '#', Comment, -1, cls.comment
yield r'$', None, -1
yield from cls.values()
[docs] @classmethod
def keys(cls, action=Name.Variable):
yield r'[A-Za-z0-9_-]+', action
yield r'''(\.)(?=[ \t]*[\}\],'"A-Za-z0-9_-])''', Delimiter.Dot
yield r'"', String, cls.string_basic
yield r"'", String, cls.string_literal
yield r'[ \t]+', skip
yield r'[^\s#=\]]+', Invalid
[docs] @classmethod
def values(cls):
yield '#', Comment, cls.comment
yield r'\[', Bracket, cls.array
yield r'\{', Bracket, cls.inline_table
yield r'"""', String, cls.string_multiline_basic
yield r'"', String, cls.string_basic
yield r"(''')(\n)?", bygroup(String, Whitespace), cls.string_multiline_literal
yield r"'", String, cls.string_literal
yield RE_DATE_TIME, Literal.Timestamp
yield RE_FULL_DATE, Literal.Timestamp
yield RE_FULL_TIME, Literal.Timestamp
yield RE_OCT, Number
yield RE_BIN, Number
yield RE_HEX, Number
yield RE_DEC, Number
yield r"[-+]?\b(?:inf|nan)\b", Number
yield r"\b(?:true|false)\b", Name.Constant
yield r'\S+', Invalid
@lexicon
def array(cls):
yield r'(\])([^,}#\n\]]*)', bygroup(Bracket, select(call(str.isspace, TEXT), Invalid, skip)), -1
yield r',', Separator
yield from cls.values()
@lexicon
def inline_table(cls):
yield '#', Comment, cls.comment
yield r'\}', Bracket, -1
yield r'=', Operator.Assignment.Invalid
yield r'\s+', skip
yield default_target, cls.inline_key
@lexicon
def inline_key(cls):
yield r'=', Operator.Assignment, -1, cls.inline_value
yield r'\}', Bracket.Invalid, -1
yield from cls.keys()
@lexicon
def inline_value(cls):
yield '#', Comment, cls.comment
yield r'\}', Bracket, -2
yield r',', Separator, -1
yield from cls.values()
@lexicon
def string_multiline_basic(cls):
yield r'(""")([^\s,}#\]]*)', bygroup(String, Invalid), -1
yield r'\\\s+', Whitespace
yield r'\\(?:["\\bfnrt]|u[0-9a-fA-F]{4})', String.Escape
yield r'\\.', String.Invalid
yield default_action, String
@lexicon(re_flags=re.MULTILINE)
def string_basic(cls):
yield r'(")([^\s,}#\]=]*)', bygroup(String, Invalid), -1
yield r'\\(?:["\\bfnrt]|u[0-9a-fA-F]{4})', String.Escape
yield r'\\.', String.Invalid
yield r'[^"]*?$', String.Invalid, -1
yield default_action, String
@lexicon
def string_multiline_literal(cls):
yield r"(''')([^\s,}#\]]*)", bygroup(String, Invalid), -1
yield default_action, String
@lexicon(re_flags=re.MULTILINE)
def string_literal(cls):
yield r"(')([^\s,}#\]=]*)", bygroup(String, Invalid), -1
yield r"[^']*?$", String.Invalid, -1
yield default_action, String
@lexicon(re_flags=re.MULTILINE)
def comment(cls):
yield from cls.comment_common()
yield r'$', Comment, -1