# -*- coding: utf-8 -*-
#
# This file is part of the parce Python package.
#
# Copyright © 2019-2020 by Wilbert Berendsen <info@wilbertberendsen.nl>
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
Scheme.
Tries to adhere to the official Scheme syntax, especially the complicated
number syntax. See for more information:
* https://www.gnu.org/software/guile/manual/r5rs.html#Formal-syntax
* https://www.scheme.com/tspl4/grammar.html
Besides the :class:`Scheme` language definition and a subclass
:class:`SchemeLily` that is used for Scheme code embedded into LilyPond, there
is a helper function :func:`scheme_number` that parses the Scheme number tokens
into a Python value. This function can be used when transforming/parsing the
Scheme tokens into some data model that access the Scheme values.
"""
__all__ = ('Scheme', 'SchemeLily', 'scheme_number', 'scheme_number_from_text')
import re
from parce import Language, lexicon, skip, default_action, default_target
from parce.action import (
Bracket, Character, Comment, Delimiter, Keyword, Name, Number, Operator,
Separator, String,
)
from parce.rule import (
ARG, MATCH, TEXT, bygroup, call, dselect, findmember, ifarg, ifmember,
pattern,
)
RE_SCHEME_RIGHT_BOUND = r"(?=$|[()\s;]|#\()"
RE_SCHEME_ID_SPECIAL_INITIAL = r'!$%&*/:<=>?^_~'
RE_SCHEME_ID_INITIAL = r'(?:[^\W\d]|[' + RE_SCHEME_ID_SPECIAL_INITIAL + '])'
RE_SCHEME_ID_SUBSEQUENT = r'[\w' + RE_SCHEME_ID_SPECIAL_INITIAL + '@.+-]'
RE_SCHEME_ID_PECULIAR = r'[-+]|\.{3}'
RE_SCHEME_ID = r'(?:' + \
RE_SCHEME_ID_PECULIAR + \
'|' + RE_SCHEME_ID_INITIAL + '(?:' + RE_SCHEME_ID_SUBSEQUENT + ')*' + \
')' + RE_SCHEME_RIGHT_BOUND
[docs]class Scheme(Language):
@lexicon
def root(cls):
yield from cls.common()
[docs] @classmethod
def common(cls, pop=0):
"""Yield common stuff. ``pop`` can be set to -1 for one-arg mode."""
yield r"['`]|,@?", Delimiter.Scheme.Quote
yield r"\(", Delimiter.OpenParen, pop, cls.list
yield r"#\(", Delimiter.OpenVector, pop, cls.vector
yield r'"', String, pop, cls.string
yield r';', Comment, pop, cls.singleline_comment
yield r'#!', Comment, pop, cls.multiline_comment
yield r"#[tTfF]\b", Number.Boolean, pop
yield r"#\\([a-z]+|.)", Character, pop
yield RE_SCHEME_ID, cls.get_word_action(), pop
_g = lambda action: bygroup(Number.Prefix, action, skip, Number.Prefix)
yield r'(#[eEiI])?(#([bBoOxXdD]))(#[eEiI])?', findmember(MATCH[3], (
('bB', (_g(Number.Prefix.Binary), pop, cls.number(2))),
('oO', (_g(Number.Prefix.Octal), pop, cls.number(8))),
('xX', (_g(Number.Prefix.Hexadecimal), pop, cls.number(16)))),
(_g(Number.Prefix.Decimal), pop, cls.number))
yield r'#[eEiI]', Number.Prefix, pop, cls.number
yield r'[-+]inf.0', Number.Infinity, pop, cls.number
yield r'[-+]nan.0', Number.NaN, pop, cls.number
yield r'[-+]', Operator.Sign, pop, cls.number
yield r'(\.?)(\d+)', bygroup(Number.Dot, Number.Decimal), pop, cls.number
if pop == 0:
yield r"\.(?!\S)", Delimiter.Dot
@lexicon(consume=True)
def list(cls):
yield r"\)", Delimiter.CloseParen, -1
yield from cls.common()
@lexicon(consume=True)
def vector(cls):
yield r"\)", Delimiter.CloseVector, -1
yield from cls.common()
[docs] @classmethod
def get_word_action(cls):
"""Return a dynamic action that is chosen based on the text."""
from . import scheme_words
return ifmember(TEXT, scheme_words.keywords, Keyword, Name)
# -------------- Number ---------------------
@lexicon(consume=True, re_flags=re.I)
def number(self):
"""Decimal numbers, derive with 2 for binary, 8 for octal, 16 for hexadecimal numbers."""
yield RE_SCHEME_RIGHT_BOUND, None, -1
_pat = lambda radix: '[{}]+'.format('0123456789abcdef'[:radix or 10])
yield pattern(call(_pat, ARG)), \
dselect(ARG, {2: Number.Binary, 8: Number.Octal, 16: Number.Hexadecimal}, Number.Decimal)
yield r'[-+]inf.0', Number.Infinity
yield r'[-+]nan.0', Number.NaN
yield r'[-+]', Operator.Sign
yield 'i', Number.Imaginary
yield ifarg(None, '([esfdl])([-+])?'), bygroup(Number.Exponent, Operator.Sign)
yield ifarg(None, r'\.'), Number.Dot
yield '@', Separator.Polar
yield '/', Separator.Fraction
yield '#+', Number.Special.UnknownDigit
yield default_action, Number.Invalid
# -------------- String ---------------------
@lexicon(consume=True)
def string(cls):
yield r'"', String, -1
yield from cls.string_common()
[docs] @classmethod
def string_common(cls):
yield r'\\[\\"|afnrtvb]', String.Escape
yield default_action, String
# -------------- Comment ---------------------
@lexicon(consume=True)
def multiline_comment(cls):
yield r'!#', Comment, -1
yield from cls.comment_common()
@lexicon(re_flags=re.MULTILINE, consume=True)
def singleline_comment(cls):
yield from cls.comment_common()
yield r'$', Comment, -1
[docs]class SchemeLily(Scheme):
"""Scheme used with LilyPond."""
@lexicon(consume=True)
def scheme(cls):
"""Pick one thing and pop back."""
yield r'\s+', skip
yield from cls.common(cls.argument)
yield default_target, -1
@lexicon(consume=True)
def argument(cls):
"""One Scheme expression."""
yield default_target, -2
[docs] @classmethod
def common(cls, pop=0):
from . import lilypond
yield r"#{", Bracket.LilyPond.Start, pop, lilypond.LilyPond.schemelily
yield from super().common(pop)
[docs]def scheme_number(tokens):
"""Return the Python value of the Scheme number in the specified tokens
iterable.
All ``tokens`` that can be in the :meth:`Scheme.number` context are
supported. Supports all features: nan, +/- inf, fractions, exactness,
complex numbers and polar coordinates.
Raises ValueError or ZeroDivisionError on faulty input.
Usage example::
>>> text = '123'
>>> from parce import root
>>> for n in root(Scheme.root, text):
... if n == Scheme.number:
... value = scheme_number(n)
...
"""
import cmath, fractions, math
from parce.util import split_list
_radix_map = {
'b': (2, Number.Binary),
'o': (8, Number.Octal),
'd': (10, Number.Decimal),
'x': (16, Number.Hexadecimal),
}
mantisse_action, radix = Number.Decimal, 10
exact = None
def get_uint(tokens):
"""Get an unsigned integer from the tokens.
Returns a float when there were unknown digits (``#``) and there was
no exact prefix (``#e``)
"""
if not tokens:
raise ValueError("expecting unsigned integer (radix: {})".format(radix))
v = 0
for t in tokens:
if t.action is mantisse_action:
v = int(t.text, radix)
elif t.action is Number.Special.UnknownDigit:
v *= radix * len(t.text)
return float(v) if not exact else v
else:
raise ValueError("unknown token in radix {}: {}".format(radix, repr(t.text)))
return v
def get_decimal10(tokens):
"""Get a decimal10 value from the tokens. Only called in decimal mode."""
v = []
e = True
i, z = 0, len(tokens)
while i < z:
t = tokens[i]
if t.action is Number.Decimal:
v.append(t.text)
elif t.action is Number.Special.UnknownDigit:
v.append('0' * len(t.text))
elif e and t.action is Number.Dot:
v.append('.')
e = False
elif t.action is Number.Exponent:
e = False
v.append('e')
i += 1
while i < z:
t = tokens[i]
if t.action is Operator.Sign:
v.append(t.text)
elif t.action is Number.Decimal:
v.append(t.text)
break
else:
raise ValueError("unknown token in exponent: {}".format(repr(t.text)))
i += 1
else:
raise ValueError("missing exponent")
break
else:
raise ValueError("unknown token in decimal 10: {}".format(repr(t.text)))
i += 1
s = ''.join(v)
if s:
if e:
return float(s) if exact is False else int(s)
return fractions.Fraction(s) if exact else float(s)
raise ValueError("expecting decimal value")
def get_real(tokens):
"""Return a real value from the tokens (can be int, float or Fraction.)."""
# get a sign, inf or nan
i, z = 0, len(tokens)
sign = 1
while i < z:
t = tokens[i]
if t.action is Operator.Sign:
if t == '-':
sign *= -1
elif t.action is Number.Infinity:
return math.inf if t.text[0] == '+' else -math.inf
elif t.action is Number.NaN:
return math.nan
else:
break
i += 1
# now, get either uint, uint/uint or decimal10
tokens, *fract = split_list(tokens[i:], '/')
if fract:
numerator = get_uint(tokens)
denominator = get_uint(fract[0])
if isinstance(numerator, float) or isinstance(denominator, float) or exact is False:
v = numerator / denominator
else:
v = fractions.Fraction(numerator, denominator)
elif radix == 10:
v = get_decimal10(tokens)
else:
v = get_uint(tokens)
return sign * v
def get_complex(tokens):
"""Return a complex value from the tokens."""
# find the imaginary part
i = len(tokens) - 2
while i >= 0:
t = tokens[i]
if t.action in (Number.Infinity, Number.NaN):
imag = get_real(tokens[i:-1])
break
elif t.action is Operator.Sign and t.group is None:
# (for a -/+ sign after an exponent, t.group is -1)
if i == len(tokens) - 2:
imag = 1 if t == '+' else -1 # the +i or -i case
else:
imag = get_real(tokens[i:-1])
break
i -= 1
else:
raise ValueError("invalid complex number")
real = get_real(tokens[:i]) if i else 0
return complex(real, imag)
### main function body
tokens = list(tokens)
# get the prefixes
i, z = 0, len(tokens)
while i < z:
t = tokens[i]
if t.action in Number.Prefix:
p = t.text[1].lower()
if p == 'i':
exact = False
elif p == 'e':
exact = True
else:
radix, mantisse_action = _radix_map[p]
else:
break
i += 1
tokens, *polar = split_list(tokens[i:], '@')
if polar:
return cmath.rect(get_real(tokens), get_real(polar[0]))
elif tokens and tokens[-1].text.lower() == 'i':
return get_complex(tokens)
return get_real(tokens)
[docs]def scheme_number_from_text(text):
"""Proof-of-concept/test function parsing Scheme/Guile number syntax.
Usage::
>>> from parce.lang.scheme import scheme_number_from_text
>>> scheme_number_from_text('123')
123
>>> scheme_number_from_text('123+3i')
(123+3j)
>>> scheme_number_from_text('#x123')
291
>>> scheme_number_from_text('#o13')
11
>>> scheme_number_from_text('1/3')
Fraction(1, 3)
>>> scheme_number_from_text('#i1/3')
0.3333333333333333
>>> scheme_number_from_text('#xdead/beef')
Fraction(57005, 48879)
Raises ValueError or ZeroDivisionError on invalid input.
"""
from parce import root
for n in root(Scheme.root, text):
if n == Scheme.number:
return scheme_number(n)
raise ValueError("invalid number: {}".format(repr(text)))
def scheme_is_indenting_keyword(text):
"""Return True if the keyword ``text`` should cause the next line to indent
normally, instead of aligning with previous line.
The words in the list below and those that start with "def", like "define",
do not follow the standard Scheme indentation patterns.
The list below and the "def" rule are from GNU Emacs source code,
which sets the standard for GNU Guile Scheme indentation.
See: https://git.savannah.gnu.org/cgit/emacs.git/tree/lisp/progmodes/scheme.el
See also: http://community.schemewiki.org/?scheme-style
"""
return text.startswith('def') or text in (
'begin',
'call-with-input-file',
'call-with-output-file',
'call-with-values',
'case',
'delay',
'do',
'dynamic-wind',
'lambda',
'let',
'let*',
'letrec',
'letrec*',
'letrec-syntax',
'let-syntax',
'let*-values',
'let-values',
'library',
'parameterize',
'receive',
'sequence',
'syntax-case',
'syntax-rules',
'unless',
'when',
'with-input-from-file',
'with-input-from-port',
'with-output-to-file',
'with-output-to-port',
)