# -*- coding: utf-8 -*-
#
# This file is part of the parce Python package.
#
# Copyright © 2019-2020 by Wilbert Berendsen <info@wilbertberendsen.nl>
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
Helper functions and classes for the :mod:`~parce.treebuilder` module.
"""
import collections
import itertools
from . import util
from .lexer import Event, Lexer
from .tree import Context, Range
from .target import TargetFactory
#: encapsulates the return values of :meth:`TreeBuilder.build_new_tree`
BuildResult = collections.namedtuple("BuildResult", "tree start end offset lexicons")
#: encapsulates the return values of :meth:`TreeBuilder.replace_tree`
ReplaceResult = collections.namedtuple("ReplaceResult", "start end lexicons")
[docs]class Changes:
"""Store changes that have to be made to a tree.
This object is used by
:meth:`~parce.treebuilder.TreeBuilder.get_changes()`. Calling
:meth:`add()` merges new changes with the existing changes.
"""
__slots__ = "text", "root_lexicon", "start", "removed", "added"
def __init__(self):
self.text = ""
self.root_lexicon = False # meaning no change is requested
self.start = -1 # meaning no text is altered
self.removed = 0
self.added = 0
def __repr__(self):
changes = []
if self.root_lexicon != False:
changes.append("root_lexicon: {}".format(self.root_lexicon))
if self.start != -1:
changes.append("text: {} -{} +{}".format(self.start, self.removed, self.added))
if not changes:
changes.append("(no changes)")
return "<Changes {}>".format(', '.join(changes))
[docs] def add(self, text, root_lexicon=False, start=0, removed=None, added=None):
"""Merge new change with existing changes.
If added and removed are not given, all text after start is
considered to be replaced.
"""
if root_lexicon != False:
self.root_lexicon = root_lexicon
if removed is None:
removed = len(self.text) - start
if added is None:
added = len(text) - start
self.text = text
if self.start == -1:
# there were no previous changes
self.start = start
self.removed = removed
self.added = added
return
# determine the offset for removed and added
if start + removed < self.start:
offset = self.start - start - removed
elif start > self.start + self.added:
offset = start - self.start - self.added
else:
offset = 0
# determine which part of removed falls inside existing changes
start = max(start, self.start)
end = min(start + removed, self.start + self.added)
offset -= max(0, end - start)
# set the new values
self.start = min(self.start, start)
self.removed += removed + offset
self.added += added + offset
[docs] def has_changes(self):
"""Return True when there are actually changes."""
return self.start != -1 or self.root_lexicon != False
[docs] def new_position(self, pos):
"""Return how the current changes would affect an older start."""
if pos < self.start:
return pos
elif pos < self.start + self.removed:
return self.start + self.added
return pos - self.removed + self.added
[docs]def get_prepared_lexer(tree, text, start, new_tree=False):
"""Get a prepared lexer reading from text, positioned at (or before) start.
Returns the three-tuple (lexer, events, tokens). The events stream is
returned seperately because the last Event can be pushed back, so it is
yielded again. The tokens are the last tokens group that remained the same.
Returns None when no position to start can be found, just start from the
beginning in this case.
If new_tree is True, does not find a start position if there would no
tokens remain left of it. This is useful when restarting a tree build; it
avoids leaving empty contexts in the build tree that should not be there.
"""
last_token = start_token = find_token_before(tree, start)
if not last_token:
return
go_back = backward(last_token)
if last_token.group is not None and last_token.group >= 0:
# we are in the middle of a group
for last_token in go_back:
if last_token.group is None or last_token.group < 0:
break
else:
return
while start:
# go back at least 10 tokens, to the beginning of a group; and don't
# stop at the first token(group) of a context whose lexicon has
# consume == True
start = 0
count = 10
for start_token in go_back:
for next_token in go_back:
if start_token.group is None and not (start_token.is_first() and start_token.parent.lexicon.consume):
count -= 1
if count == 0:
start = start_token.pos
break
start_token = next_token
break
if start:
lexer = get_lexer(start_token)
elif new_tree:
return
else:
lexer = Lexer([tree.lexicon])
events = lexer.events(text, start)
# compare the new events with the old tokens; at least one
# should be the same; if not, go back further if possible
old_events = events_with_tokens(start_token, last_token)
prev = None
for (old, tokens), new in zip(old_events, events):
if not same_events(old, new):
if prev:
return lexer, itertools.chain((new,), events), prev
break
prev = tokens
else:
if prev:
return lexer, events, prev
[docs]def events_with_tokens(start_token, last_token):
r"""Yield (Event, tokens) tuples for start_token until and including last_token.
Events are yielded together with token groups (or single tokens in a
1-length tuple).
This function is used by :func:`get_prepared_lexer` to compare an existing
token structure with events originating from a lexer.
The start_token must be the first of a group, if it is a GroupToken.
Additionally, it should not be the first token in a context whose lexicon
has the ``consume`` flag set. But if the start_token is the very first
token in a tree, it does not matter if it is not in the root context, this
case is handled gracefully.
"""
context, start_trail, end_trail = common_ancestor_with_trail(start_token, last_token)
if context:
r = Range(context, start_trail, end_trail)
target = TargetFactory()
get, push, pop = target.get, target.push, target.pop
def events(nodes):
stack = []
i = 0
n = nodes
while True:
z = len(n)
while i < z:
m = n[i]
if m.is_context:
push(m.lexicon)
stack.append(i)
i = 0
n = m
break
else:
group = m,
if m.group is not None:
# find the last token in this group
j = i + 1
while j < z and n[j].group > 0:
j += 1
group = n[i:j+1]
lexemes = tuple((t.pos, t.text, t.action) for t in group)
yield Event(get(), lexemes), group
i += len(group)
else:
if stack:
pop()
i = stack.pop() + 1
n = n.parent if stack else nodes # slice we started with
else:
break
if start_token.is_first() and not start_token.parent.is_root() \
and not any(backward(start_token)):
# start token is the very first token, but it is not in the root
# context. So it is a child of a lexicon with consume, or a context
# that was jumped to via a default target. Build a target from root.
lexicons = [p.lexicon for p in start_token.ancestors()]
push(*lexicons[-2::-1]) # reversed, not root
for context, slice_ in r.slices(target):
yield from events(context[slice_])
[docs]def get_lexer(token):
"""Get a Lexer initialized at the token's ancestry."""
lexicons = [p.lexicon for p in token.ancestors()]
lexicons.reverse()
return Lexer(lexicons)
[docs]def new_tree(token):
"""Return an empty context (and its root) with the same ancestry as the token's."""
c = n = context = Context(token.parent.lexicon, None)
for p in token.parent.ancestors():
n = Context(p.lexicon, None)
c.parent = n
n.append(c)
c = n
return context, n
[docs]def find_token_before(node, pos):
"""A version of :meth:`Context.find_token_before()
<parce.tree.Context.find_token_before>` that can handle empty contexts.
The new tree built inside :meth:`TreeBuilder.build_new_tree()
<parce.treebuilder.TreeBuilder.build_new_tree>` can have an empty context
at the beginning and/or the end. Returns None if there is no token left
from pos.
"""
while True:
i = 0
hi = len(node)
while i < hi:
mid = (i + hi) // 2
n = node[mid]
if n.is_context:
n = n.first_token() or n # if no first token, just n itself
if pos < n.end:
hi = mid
else:
i = mid + 1
if i == 0:
return
node = node[i-1]
if node.is_token:
return node
[docs]def ancestors_with_index(node):
"""A version of :meth:`Node.ancestors_with_index()
<parce.tree.Node.ancestors_with_index>` that can handle empty contexts.
"""
while node.parent:
index = node.parent.index(node)
node = node.parent
yield node, index
[docs]def backward(node):
"""A version of :meth:`Node.backward() <parce.tree.Node.backward>` that can
handle empty contexts.
"""
for node, index in ancestors_with_index(node):
if index:
yield from util.tokens(node[:index], True)
[docs]def common_ancestor_with_trail(node, other):
"""A version of :meth:`Token.common_ancestor_with_trail()
<parce.tree.Token.common_ancestor_with_trail>` that can handle empty
contexts.
"""
if other is node:
i = node.parent.index(node)
return node.parent, (i,), (i,)
if other.pos > node.pos:
s_ancestors, s_indices = zip(*ancestors_with_index(node))
o_indices = []
for n, i in ancestors_with_index(other):
o_indices.append(i)
try:
s_i = s_ancestors.index(n)
except ValueError:
continue
return n, s_indices[s_i::-1], o_indices[::-1]
return None, None, None
[docs]def same_events(e1, e2):
"""Compare Event tuples in a robust way.
Returns True if the events are completely the same. The lexicon in a target
is compared on identity instead of equality. This prevents different
derived lexicons from comparing the same, which is needed when rebuilding
a tree.
"""
if e1 != e2:
return False
elif e1.target is None or not e1.target.push:
return True
# both targets compare equal, and have a non-empty push value that compares
# equal, so now we only need to compare the lexicons on identity.
return all(l1 is l2 for l1, l2 in zip(e1.target.push, e2.target.push))