Source code for parce.validate

# -*- coding: utf-8 -*-
#
# This file is part of the parce Python package.
#
# Copyright © 2019-2020 by Wilbert Berendsen <info@wilbertberendsen.nl>
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.


import collections
import re
import reprlib

import parce
from . import introspect
from .lexicon import Lexicon
from .ruleitem import variations, a_number


[docs]class LexiconValidator: def __init__(self, lexicon): self.lexicon = lexicon self.errors = set() self.warnings = set()
[docs] def error(self, msg, lexicon=None): """Add message to errors.""" msg = "{}: error: {}".format(lexicon or self.lexicon, msg) self.errors.add(msg)
[docs] def warning(self, msg, lexicon=None): """Add message to warnings.""" msg = "{}: warning: {}".format(lexicon or self.lexicon, msg) self.warnings.add(msg)
[docs] def validate(self): """Validate a lexicon. Errors and warnings are left in the ``errors`` and ``warnings`` attributes, respectively. """ self.errors.clear() self.warnings.clear() default_act, default_tg = None, None patterns = set() for n, rule in enumerate(self.lexicon.rules, 1): if not isinstance(rule, (tuple, list)): self.error("rule #{}: invalid rule; should be tuple or list".format(n)) continue elif len(rule) < 2: self.error("rule #{}: invalid rule; pattern and action should be there".format(n)) continue pattern, *rule = rule if pattern is parce.default_action: if default_act: self.error("rule #{}: conflicting default actions".format(n)) else: default_act = rule[0] elif pattern is parce.default_target: if default_tg: self.error("rule #{}: conflicting default targets".format(n)) else: default_tg = rule self.check_default_target(default_tg) else: if pattern is None: self.warning("rule #{}: pattern is None; rule will be skipped".format(n)) else: self.validate_pattern(pattern, n) if pattern in patterns: self.warning("rule #{0}: repeated pattern {1}; will be skipped".format(n, reprlib.repr(pattern))) patterns.add(pattern) self.validate_rule(rule, n) if default_act and default_tg: self.error("can't have both default_action and default_target") return not self.errors
[docs] def validate_pattern(self, pattern, n): """Validate a regular expression pattern.""" try: rx = re.compile(pattern, self.lexicon.re_flags) except (TypeError, re.error) as e: self.error("rule #{0}: regular expression {1} error:\n {2}".format(n, repr(pattern), e)) else: if rx.match(''): self.warning("rule #{0}: pattern {1} matches the empty string".format(n, repr(pattern)))
[docs] def validate_rule(self, rule, n): """Validate a rule, which should be action, target[, target, ...]. Does not look at the action, but checks whether all the targets are valid (either an integer or a Lexicon). """ # all possible rule paths for path in variations(rule): for target in path[1:]: # the first item always is the action if target is not a_number and not isinstance(target, (int, Lexicon)): self.error("rule #{0}: invalid target: {1}".format(n, target))
[docs] def check_default_target(self, target): """Check whether this default target could lead to circular references. This could hang the parser, and we wouldn't like to have that :-) """ # a unique object for every entered context, mimicking tree builder behaviour class Context: def __init__(self, lexicon): self.lexicon = lexicon lexicon = self.lexicon lexicons = collections.Counter() # count them to find the circular culprits state = [Context(lexicon)] circular = set() # track circular (existing) contexts warn = False context = state[-1] while True: circular.add(context) lexicons.update((lexicon,)) # count depth = len(state) for t in target: if isinstance(t, int): if t < 0: if len(state) + t < 1: return del state[t:] else: for _ in range(t): state.append(Context(lexicon)) elif not isinstance(t, Lexicon): self.error("in default target only integer or lexicon allowed", lexicon) return else: state.append(Context(t)) newcontext = state[-1] lexicon = newcontext.lexicon if newcontext is context: self.error("invalid default target", lexicon) return context = newcontext if newcontext in circular: # a circular default state that lands in an existing context # is handled gracefully by the tree builder warn = True if len(circular) > 100: lexicons = " <-> ".join(str(l) for l, n in lexicons.items() if n > 1) if warn: # this type of circular default state is handled self.warning("handled circular default target: {}".format(lexicons), lexicon) else: # run away default states creating new contexts all the time self.error("circular default target: {}".format(lexicons), lexicon) return for pattern, *target in lexicon.rules: if pattern is parce.default_target: break else: break
[docs]def validate_language(lang): """Validate all lexicons in this language. Errors and warnings are printed to stdout. If there are errors, this function returns False, otherwise True. """ errors = set() warnings = set() for lexicon in introspect.lexicons(lang): v = LexiconValidator(lexicon) v.validate() errors.update(v.errors) warnings.update(v.warnings) for warning in warnings: print(warning) for error in errors: print(error) return not errors
[docs]def validate_transform(transform, language): """Check whether the Transform has a method for every lexicon. Returns False when method names are not defined. Prints the missing names to the console. Prints a notice to the console when lexicons are ignored (i.e. the method name has been set to None in the Transform class) or when a transform method accepts untransformed contexts. """ ok = True sentinel = object() tf = "{}.{}".format(transform.__class__.__module__, transform.__class__.__name__) for lexicon in introspect.lexicons(language): meth = getattr(transform, lexicon.name, sentinel) if meth is sentinel: print("Missing transform method in {tf} for lexicon: {name}".format(tf=tf, name=lexicon.fullname)) ok = False elif not meth: print("Transform {tf} ignores output of lexicon: {name}".format(tf=tf, name=lexicon.fullname)) elif getattr(meth.__func__, "add_untransformed", False): print("Method {tf}.{name} accepts untransformed child contexts".format(tf=tf, name=lexicon.name)) return ok