Source code for parce.ruleitem

# -*- coding: utf-8 -*-
#
# This file is part of the parce Python package.
#
# Copyright © 2019-2020 by Wilbert Berendsen <info@wilbertberendsen.nl>
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.


"""
This module contains the implementation of the replacable rule item objects,
and some functions to query and manipulate rules that are used by the Lexicon.

Normally, you don't need this module directly; use the :mod:`~parce.rule`
module for your language definitions.

"""

import operator

from . import util


# pre_evaluate flags
_CHANGED = 0
_COMPLETE = 1
_UNCHANGED = 2


class _EvaluationError(RuntimeError):
    """Raised when an attribute is missing in the evaluation namespace."""
    pass


[docs]class Item: """Base class for any replacable rule item. An Item is considered to be immutable; you should never alter the attributes after instantiation. When an Item can be partly pre-evaluated a copy must be returned by calling ``type(self)(*args)``. In some cases you can also return a different Item type when pre-evaluating partly succeeds. For example, the :class:`select` type simply returns the chosen item if the index already can be evaluated, without evaluating the other items. """ __slots__ = () _getitem_func = operator.getitem def __getitem__(self, n): """Return a new Item that performs item[n]. n is evaluated as well.""" return call(self._getitem_func, self, n)
[docs] def evaluate(self, ns): """Evaluate item in namespace dict ``ns``.""" raise NotImplementedError
[docs] def pre_evaluate(self, ns): """Try to evaluate item in namespace dict ``ns``. Return a two-tuple(obj, success). Success is a two-bit value indicating whether the result is completely evaluated and whether something has changed. Bit 0 is set when the item is completely evaluated, and bit 1 is set when there was no modification. So all possible return values are: | 0: the object has changed but it is not yet completely evaluated | 1: the object has changed and now it is fully evaluated | 2: the object needs evaluation but is not changed Specific items may reimplement this method to return partially evaluated items. """ try: return self.evaluate(ns), _COMPLETE except _EvaluationError: return self, _UNCHANGED
[docs] def variations(self): """Yield the possible results for this item. This is used to build a decision tree for a rule, to see which actions and targets it could bring. The default implementation raises a RuntimeError; only RuleItem objects can yield variations. """ raise RuntimeError("Item '{}' can't be used directly in a rule".format(repr(self)))
def __repr__(self): return "{}({})".format( self.__class__.__name__, ', '.join(map(repr, self._repr_args()))) def _repr_args(self): return ()
[docs]class VariableItem(Item): """A named variable that's accessed in the namespace. If a getitem_func is given, it is called when this item is accessed using the item syntax ``[n]``, with this item as first argument, and then the specified index. """ __slots__ = ('_name', '_getitem_func') def __init__(self, name, getitem_func=operator.getitem): self._name = name self._getitem_func = getitem_func
[docs] def evaluate(self, ns): """Get the variable from the namespace dict.""" try: return ns[self._name] except KeyError as e: raise _EvaluationError("Can't find variable '{}'".format(self._name)) from e
def __repr__(self): return self._name.upper()
[docs]class call(Item): """Call predicate with arguments.""" __slots__ = ('_predicate', '_arguments') def __init__(self, predicate, *arguments): self._predicate = predicate self._arguments = arguments
[docs] def evaluate(self, ns): """Call predicate with the arguments.""" predicate = evaluate(self._predicate, ns) arguments = evaluate(self._arguments, ns) return predicate(*arguments)
[docs] def pre_evaluate(self, ns): """Optimize by pre-evaluating what can be pre-evaluated.""" predicate, pred_ok = pre_evaluate(self._predicate, ns) arguments, arg_ok = pre_evaluate(self._arguments, ns) ok = pred_ok & arg_ok if ok & _COMPLETE: return predicate(*arguments), _COMPLETE if ok & _UNCHANGED: return self, _UNCHANGED return type(self)(predicate, *arguments), _CHANGED
def _repr_args(self): return (self._predicate, *self._arguments)
[docs]class RuleItem(Item): """Classes inheriting RuleItem are allowed in toplevel in rules. They are evaluated by the lexicon when a rule matches. """ __slots__ = ()
[docs]class select(RuleItem): """Chooses one of the items. If an item is a list, it is unrolled when replacing the item in a rule. """ __slots__ = ('_index', '_items') def __init__(self, index, *items): self._index = index self._items = items
[docs] def evaluate(self, ns): """Return items[index].""" index = evaluate(self._index, ns) item = evaluate(self._items[index], ns) return item
[docs] def pre_evaluate(self, ns): """Optimize by pre-evaluating what can be pre-evaluated.""" index, ok = pre_evaluate(self._index, ns) if ok & _COMPLETE: item, ok = pre_evaluate(self._items[index], ns) return item, ok & _COMPLETE # mask unchanged state items, items_ok = pre_evaluate(self._items, ns) ok &= items_ok if ok & _UNCHANGED: return self, _UNCHANGED return type(self)(index, *items), _CHANGED
[docs] def variations(self): """Yield all the items that could be chosen (unevaluated).""" yield from self._items
def _repr_args(self): return (self._index, *self._items)
[docs]class target(RuleItem): """target(value, *lexicons) Has a special handling: if the value is an integer, it is used as the result value (to push/pop contexts). If it is a two-tuple(index, argument): The index points to the lexicon and the argument is used as lexicon argument. """ __slots__ = ('_value', '_lexicons') def __init__(self, value, *lexicons): self._value = value self._lexicons = lexicons
[docs] def evaluate(self, ns): """Return value if integer, otherwise lexicons[value[0]](value[1]).""" value = evaluate(self._value, ns) if isinstance(value, int): return value index, arg = value lexicon = evaluate(self._lexicons[index], ns) return lexicon if arg is None else lexicon(arg)
[docs] def pre_evaluate(self, ns): """Optimize by pre-evaluating what can be pre-evaluated.""" value, ok = pre_evaluate(self._value, ns) if ok & _COMPLETE: if isinstance(value, int): return value, _COMPLETE index, arg = value lexicon, ok = pre_evaluate(self._lexicons[index], ns) if ok & _COMPLETE: return (lexicon if arg is None else lexicon(arg), _COMPLETE) return type(self)((0, arg), lexicon), _CHANGED # pre-evaluate the lexicons lexicons, l_ok = pre_evaluate(self._lexicons, ns) ok &= l_ok if ok & _UNCHANGED: return self, ok return type(self)(value, *lexicons), ok
[docs] def variations(self): """Yield our possible variations. If the value is evaluated, yield either the value or the chosen lexicon. If not, yields ``a_number`` and all lexicons items. """ value = self._value if isinstance(value, Item): yield a_number yield from self._lexicons elif isinstance(value, int): yield value else: index, arg = value yield self._lexicons[index]
def _repr_args(self): return (self._value, *self._lexicons)
[docs]class PostponedItem(RuleItem): """Mixin base class for items that keep alive after the Lexicon. When inheriting from this class, implement the :meth:`evaluate_items` method, which lists all values as they were given to the __init__ method. If this method returns values, those are evaluated, and a new PostponedItem is returned with the contents evaluated. """ __slots__ = ()
[docs] def evaluate(self, ns): """Evaluate all values returned by the evaluate_items() method. If any value changes, a copy of the Item is returned, otherwise the Item ifself. If the evaluate_items() method does not yield any value, this Item is always returned unchanged. """ items, ok = pre_evaluate(self.evaluate_items(), ns) return self if ok & _UNCHANGED else type(self)(*items)
[docs] def pre_evaluate(self, ns): """Pre-evaluate all values returned by the evaluate_items() method. If any value changes, a copy of the Item is returned, otherwise the Item ifself. """ items, ok = pre_evaluate(self.evaluate_items(), ns) return self if ok & _UNCHANGED else type(self)(*items), ok
[docs] def evaluate_items(self): """Return a tuple of the values as given to the __init__ method, when they need to be evaluated inside this PostponedItem. This method should either yield *all* values that were given to the __init__ method, or nothing. The default implementation yields nothing, so nothing is evaluated or pre-evaluated. """ return ()
[docs]class pattern(PostponedItem): """Represents a pattern. This evaluates its value, but remains alive after building the rule. """ __slots__ = ('_value',) def __init__(self, value): self._value = value @property def value(self): """Get the pattern value.""" return self._value
[docs] def evaluate_items(self): """Yield the pattern value.""" return self._value,
[docs] def variations(self): """If the value is evaluated, yield it, otherwise yields ``None`` and ``a_string``.""" if isinstance(self._value, Item): yield None yield a_string else: yield self._value
def _repr_args(self): return self._value,
[docs]class ActionItem(PostponedItem): """Mixin base class for dynamic actions.""" __slots__ = ()
[docs]class SubgroupAction(ActionItem): """Yield actions from subgroups in a match. A SubgroupAction looks at subgroups in the regular expression match and returns the same amount of tokens as there are subgroups, using the specified action for every subgroup. For example, the rule:: "(0x)([0-9a-f]+)", SubgroupAction(Number.Prefix, Number.Hexadecimal) yields two tokens in case of a match, one for "0x" and the other for the other group of the match. There should be the same number of subgroups in the regular expression as there are action attributes given to __init__(). """ __slots__ = ('_actions',) def __init__(self, *actions): self._actions = actions
[docs] def replace(self, lexer, pos, text, match): for i, action in enumerate(self._actions, match.lastindex + 1): yield from lexer.filter_actions(action, match.start(i), match.group(i), match)
[docs] def variations(self): """Yield the possible actions.""" yield from self._actions
def _repr_args(self): return self._actions
[docs] def pre_evaluate(self, ns): """Reimplemented to only pre-evaluate subgroup actions (evaluating happens in the lexer).""" items, ok = pre_evaluate(self._actions, ns) return self if ok & _UNCHANGED else type(self)(*items), ok
[docs]class DelegateAction(ActionItem): """This action uses a lexicon to parse the text. All tokens are yielded as one group, flattened, ignoring the tree structure, so this is not efficient for large portions of text, as the whole region is parsed again on every modification. But it can be useful when you want to match a not too large text blob first that's difficult to capture otherwise, and then lex it with a lexicon that does (almost) not enter other lexicons. """ __slots__ = ('_lexicon',) def __init__(self, lexicon): self._lexicon = lexicon
[docs] def replace(self, lexer, pos, text, match): """Use our lexicon to parse the matched text.""" sublexer = type(lexer)([self._lexicon]) for e in sublexer.events(text): for p, txt, action in e.lexemes: yield pos + p, txt, action
[docs] def evaluate_items(self): """Return the lexicon specified on init, used by evaluate() and pre_evaluate().""" return self._lexicon,
[docs] def variations(self): """Yield our lexicon.""" yield self._lexicon
def _repr_args(self): return self._lexicon,
[docs]class SkipAction(ActionItem): """A DynamicAction that yields nothing. A SkipAction() is stored in the module variable ``skip`` and causes the rule to silently ignore the matched text. """
[docs] def replace(self, lexer, pos, text, match): yield from ()
[docs] def variations(self): """Yield no variations.""" return yield
[docs]def evaluate(obj, ns): """Evaluate an object, that may or may not be an Item. The namespace `ns` is a dictionary containing text, match and/or arg variables. A list or a tuple of items is also evaluated and always becomes a tuple. """ if isinstance(obj, Item): return obj.evaluate(ns) if type(obj) in (list, tuple): return tuple(evaluate(o, ns) for o in obj) return obj
[docs]def evaluate_rule(rule, match): """Evaluate all RuleItem objects in the rule. The specified match object provides the value for the TEXT and MATCH variables. Lists and tuples are unrolled. """ ns = {'text': match.group(), 'match': match} def eval_rule_items(objs): for obj in objs: if isinstance(obj, RuleItem): yield from util.unroll(obj.evaluate(ns)) elif type(obj) in (list, tuple): yield from eval_rule_items(obj) else: yield obj yield from eval_rule_items(rule)
[docs]def pre_evaluate(obj, ns): """Pre-evaluate any object, that may or may not be an Item. Returns a two-tuple(result, success). The namespace `ns` is a dictionary containing text, match and/or arg variables. * If the object is an Item, returns ``object.pre_evaluate(ns)``. * If the object is a list or tuple, evaluates the contents and returns a tuple. * If the object is none of the above, simply returns the object unchanged. The ``success`` value can be one of the values described in :meth:`Item.pre_evaluate`, or it is 3, meaning that the object is returned unchanged and needs no evaluation. """ if isinstance(obj, Item): return obj.pre_evaluate(ns) if type(obj) in (list, tuple): objs, success = [], 3 for o in obj: res, ok = pre_evaluate(o, ns) objs.append(res) success &= ok return tuple(objs), success return obj, 3
[docs]def pre_evaluate_rule(rule, arg): """Evaluate all RuleItem objects that can be evaluated in the rule. The specified ``arg`` provides the value for the ARG variable. Rule items that depend on the match object are not yet evaluated. Lists and tuples are unrolled. Returns the rule as a tuple. """ ns = {'arg': arg} def pre_eval_rule_items(objs): for obj in objs: if isinstance(obj, RuleItem): yield from util.unroll(obj.pre_evaluate(ns)[0]) elif type(obj) in (list, tuple): yield from pre_eval_rule_items(obj) else: yield obj result = pre_eval_rule_items(rule) # the first item may be a pattern instance; it should be evaluated by now for item in result: if isinstance(item, pattern): item = item.value return (item,) + tuple(result) return ()
[docs]def needs_evaluation(rule): """Return True if there are items in the rule that need evaluating.""" for item in rule: if isinstance(item, PostponedItem): if needs_evaluation(item.evaluate_items()): return True elif isinstance(item, RuleItem) or \ (type(item) in (tuple, list) and needs_evaluation(item)): return True return False
[docs]def variations_tree(rule): """Return a tuple with the tree structure of all possible variations. Branches (choices) are indicated by a frozenset, which contains zero or more tuples. """ items = tuple(rule) for i, item in enumerate(items): if isinstance(item, Item): branch = [variations_tree(util.unroll(v)) for v in item.variations()] if branch: branch = branch[0] if len(branch) == 1 else frozenset(branch) return (*items[:i], branch, *variations_tree(items[i+1:])) else: return items
[docs]def variations(rule): """Yield all possible variations of the rule.""" items = tuple(rule) for i, item in enumerate(items): if isinstance(item, Item): prefix = items[:i] for suffix in variations(items[i+1:]): for v in item.variations(): for l in variations(util.unroll(v)): yield prefix + l + suffix break else: yield items
#: sentinel denoting that a variation is any integer a_number = util.Symbol("a_number") #: sentinel denoting that a variation is any string a_string = util.Symbol("a_string")