Source code for parce.tree

# -*- coding: utf-8 -*-
#
# This file is part of the parce Python package.
#
# Copyright © 2019-2020 by Wilbert Berendsen <info@wilbertberendsen.nl>
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.


"""
This module defines the tree structure a text is parsed into.

A tree consists of Context and Token objects. (Both inherit from the base
class Node, which defines the shared methods and properties.)

A :class:`Context` is a list containing Tokens and other Contexts. A Context is
created when a lexicon becomes active. A Context knows its parent Context and
its lexicon.

A :class:`Token` represents one parsed piece of text. A Token is created when a
rule in the lexicon matches. A Token knows its parent Context, its position in
the text and the action that was specified in the rule.

A Context is always non-empty, except for the root Context, which represents
the root lexicon and can be empty if the document did not generate a single
token.

The tree structure is easy to navigate, no special objects or iterators are
necessary for that. To find a token at a certain position in a context, use
:meth:`Context.find_token` and its relatives. From every node you can iterate
:meth:`~Node.forward` and :meth:`~Node.backward`. Use the methods like
:meth:`~Node.left_siblings` and :meth:`~Node.right_siblings` to traverse the
current context.

"""


import itertools
import reprlib
import weakref

from parce import util
from parce import query
from parce.lexicon import Lexicon


DUMP_STYLES = {
    "ascii":   (" | ", "   ", " |-", " `-"),
    "round":   (" │ ", "   ", " ├╴", " ╰╴"),
    "square":  (" │ ", "   ", " ├╴", " └╴"),
    "double":  (" ║ ", "   ", " ╠═", " ╚═"),
    "thick":   (" ┃ ", "   ", " ┣╸", " ┗╸"),
    "flat":    ("│", " ", "├", "╰"),
}

DUMP_STYLE_DEFAULT = "round"


[docs]class Node:
    """Methods that are shared by Token and Context."""
    __slots__ = ('__weakref__',)

    is_token = False
    is_context = False

    @property
    def parent(self):
        """The parent Context (or None; uses a weak reference)."""
        return self._parent()

    @parent.setter
    def parent(self, parent):
        """Set the parent (to a Context or None)."""
        self._parent = weakref.ref(parent) if parent is not None else lambda: None

    @parent.deleter
    def parent(self):
        """Set the parent to None."""
        self._parent = lambda: None

[docs]    def copy(self, parent=None):
        """Return a copy of the Node, but with the specified parent."""
        raise NotImplementedError

[docs]    def dump(self, file=None, style=None, depth=0):
        """Display a graphical representation of the node and its contents.

        The file object defaults to stdout, and the style to "round". You can
        choose any style that's in the ``DUMP_STYLES`` dictionary.

        """
        i = 2
        d = DUMP_STYLES[style or DUMP_STYLE_DEFAULT]
        prefix = []
        node = self
        for _ in range(depth):
            prefix.append(d[i + int(node.is_last())])
            node = node.parent
            i = 0
        print("".join(reversed(prefix)) + repr(self), file=file)
        if self.is_context:
            for n in self:
                n.dump(file, style, depth + 1)

    @property
    def pwd(self):
        """Show the ancestry, for debugging purposes."""
        nodes = [self]
        nodes.extend(self.ancestors())
        nodes.reverse()
        d = DUMP_STYLES[DUMP_STYLE_DEFAULT]
        for n, node in enumerate(nodes):
            print(''.join((
                d[1] * max(0, n-1),
                d[3] if n else '',
                repr(node),
                " [{}]".format(nodes[n-1].index(node)) if n else '',
            )))

[docs]    def parent_index(self):
        """Return our index in the parent.

        This is recommended above using parent.index(self), because this method
        finds our index using a binary search on position, while the latter
        is a linear search, which is certainly slower with a large number of
        children.

        """
        p = self.parent
        pos = self.pos
        lo = 0
        hi = len(p)
        while lo < hi:
            mid = (lo + hi) // 2
            n = p[mid]
            if n.pos < pos:
                lo = mid + 1
            elif n is self:
                return mid
            else:
                hi = mid
        return lo

[docs]    def root(self):
        """Return the root node."""
        root = self
        for root in self.ancestors():
            pass
        return root

[docs]    def is_last(self):
        """Return True if this Node is the last child of its parent.

        Fails if called on the root element.

        """
        return self.parent[-1] is self

[docs]    def is_first(self):
        """Return True if this Node is the first child of its parent.

        Fails if called on the root element.

        """
        return self.parent[0] is self

[docs]    def is_ancestor_of(self, node):
        """Return True if this Node is an ancestor of the other Node."""
        for n in node.ancestors():
            if n is self:
                return True
        return False

[docs]    def ancestors(self, upto=None):
        """Climb the tree up over the parents.

        If upto is given and it is one of the ancestors, stop after yielding
        that ancestor. Otherwise iteration stops at the root node.

        """
        node = self.parent
        if upto and upto.parent is not None:
            p = upto.parent
            while node is not None and node is not p:
                yield node
                node = node.parent
        else:
            while node is not None:
                yield node
                node = node.parent

[docs]    def ancestors_with_index(self, upto=None):
        """Yield the ancestors(upto), and the index of each node in the parent."""
        n = self
        for p in self.ancestors(upto):
            yield p, n.parent_index()
            n = p

[docs]    def common_ancestor(self, other):
        """Return the common ancestor with the Context or Token."""
        ancestors = []
        if self.is_context:
            ancestors.append(self)
        ancestors.extend(self.ancestors())
        if other.is_context and other in ancestors:
            return other
        for n in other.ancestors():
            if n in ancestors:
                return n

[docs]    def depth(self):
        """Return the number of ancestors."""
        return sum(1 for n in self.ancestors())

[docs]    def left_sibling(self):
        """Return the left sibling of this node, if any.

        Does not descend in child nodes or ascend upto the parent.
        Fails if called on the root node.

        """
        if self.parent[0] is not self:
            i = self.parent_index()
            return self.parent[i-1]

[docs]    def right_sibling(self):
        """Return the right sibling of this node, if any.

        Does not descend in child nodes or ascend upto the parent.
        Fails if called on the root node.

        """
        if self.parent[-1] is not self:
            i = self.parent_index()
            return self.parent[i+1]

[docs]    def left_siblings(self):
        """Yield the left siblings of this node in reverse order, if any.

        Does not descend in child nodes or ascend upto the parent.
        Fails if called on the root node.

        """
        if self.parent[0] is not self:
            i = self.parent_index()
            yield from self.parent[i-1::-1]

[docs]    def right_siblings(self):
        """Yield the right siblings of this node, if any.

        Does not descend in child nodes or ascend upto the parent.
        Fails if called on the root node.

        """
        if self.parent[-1] is not self:
            i = self.parent_index()
            yield from self.parent[i+1:]

[docs]    def next_token(self):
        """Return the following Token, if any."""
        for t in self.forward():
            return t

[docs]    def previous_token(self):
        """Return the preceding Token, if any."""
        for t in self.backward():
            return t

[docs]    def forward(self, upto=None):
        """Yield all Tokens in forward direction, starting at the right sibling.

        Descends into child Contexts, and ascends into parent Contexts.
        If upto is given, does not ascend above that context.

        """
        for parent, index in self.ancestors_with_index(upto):
            yield from util.tokens(parent[index+1:])

[docs]    def backward(self, upto=None):
        """Yield all Tokens in backward direction, starting at the left sibling.

        Descends into child Contexts, and ascends into parent Contexts.
        If upto is given, does not ascend above that context.

        """
        for parent, index in self.ancestors_with_index(upto):
            if index:
                yield from util.tokens(parent[:index], True)

    @property
    def query(self):
        """Query this node in different ways; see the :mod:`~parce.query` module."""
        def gen():
            yield self
        return query.Query(gen)

[docs]    def delete(self):
        """Remove this node from its parent.

        If the parent would become empty, it is removed too.
        Returns the first non-empty ancestor.

        """
        for parent, index in self.ancestors_with_index():
            del parent[index]
            if len(parent):
                return parent


[docs]class Token(Node):
    """A Token instance represents a lexed piece of text.

    When a pattern rule in a lexicon matches the text, a Token is created. When
    that rule would create more than one Token from a single regular expression
    match, GroupToken objects are created instead, carrying the index of the
    token in the group in the `group` attribute. The `group` attribute is
    readonly None for normal tokens.

    GroupTokens are thus always adjacent in the same context. If you want to
    retokenize text starting at some position, be sure you are at the start of
    a grouped token, e.g.::

        t = ctx.find_token(45)
        if t.group:
            for t in t.left_siblings():
                if not t.group:
                    break
        pos = t.pos

    Alternatively, you can use the `GroupToken.get_group_*` methods.

    (A GroupToken is just a normal Token otherwise, the reason a subclass was
    created is that the group attribute is unused in by far the most tokens, so
    it does not use any memory. You never need to reference the GroupToken
    class; just test the group attribute if you want to know if a token belongs
    to a group that originated from a single match.)

    When iterating over the children of a Context (which may be Context or
    Token instances), you can use the `is_token` attribute to determine whether
    the node child is a token, which is easier than to call `isinstance(t,
    Token)` each time.

    From a token, you can iterate `forward()` or `backward()` to find adjacent
    tokens. If you only want to stay in the current context, use the various
    sibling methods, such as `right_sibling()`.

    By traversing the `ancestors()` of a token or context, you can find which
    lexicons created the tokens.

    You can compare a Token instance with a string. Instead of::

        if token.text == "bla":
            do_something()

    you can do::

        if token == "bla":
            do_something()

    You can call `len()` on a token, which returns the length of the token's
    text attribute, and you can use the string format method to embed the
    token's text in another string::

        s = "blabla {}".format(token)

    A token always has a parent, and that parent is always a Context instance.

    """

    __slots__ = "_parent", "pos", "text", "action"

    is_token = True     #: Always True for Token

    def __init__(self, parent, pos, text, action):
        self.parent = parent    #: The Context node to which the token was added
        self.pos = pos          #: The position in the original text
        self.text = text        #: The text of this token
        self.action = action    #: The action specified by the lexicon rule that created the token

    @property
    def end(self):
        """The end position of this token in the original text."""
        return self.pos + len(self.text)

    group = None        #: Always None for Token, an integer for :class:`GroupToken`

[docs]    def copy(self, parent=None):
        """Return a copy of the Token, but with the specified parent."""
        return type(self)(parent, self.pos, self.text, self.action)

[docs]    def equals(self, other):
        """Return True if the other Token has the same ``text`` and ``action``
        attributes and the same context ancestry (see also
        :meth:`state_matches`).

        Note that the ``pos`` attribute is not compared.

        """
        return (self.text == other.text
                and self.action == other.action
                and self.state_matches(other))

[docs]    def state_matches(self, other):
        """Return True if the other Token has the same lexicons in the ancestors."""
        if other is self:
            return True
        for c1, c2 in zip(self.ancestors(), other.ancestors()):
            if c1 is c2:
                return True
            elif c1.lexicon is not c2.lexicon:
                return False
        return c1.parent is None and c2.parent is None

    def __repr__(self):
        text = reprlib.repr(self.text)
        return "<Token {} at {}:{} ({})>".format(text, self.pos, self.end, self.action)

    def __hash__(self):
        return Node.__hash__(self)

    def __eq__(self, other):
        if isinstance(other, str):
            return other == self.text
        return other is self

    def __ne__(self, other):
        if isinstance(other, str):
            return other != self.text
        return other is not self

    def __format__(self, formatstr):
        return self.text.__format__(formatstr)

    def __len__(self):
        return len(self.text)

[docs]    def forward_including(self, upto=None):
        """Yield all tokens in forward direction, including self."""
        yield self
        yield from self.forward(upto)

[docs]    def backward_including(self, upto=None):
        """Yield all tokens in backward direction, including self."""
        yield self
        yield from self.backward(upto)

[docs]    def forward_until_including(self, other):
        """Yield all tokens starting with us and upto and including the other."""
        r = self.range(other)
        if r:
            yield from r.tokens()

[docs]    def common_ancestor_with_trail(self, other):
        """Return a three-tuple(context, trail_self, trail_other).

        The context is the common ancestor such as returned by common_ancestor,
        if any. trail_self is a tuple of indices from the common ancestor upto
        self, and trail_other is a tuple of indices from the same ancestor upto
        the other Token.

        If there is no common ancestor, all three are None. But normally,
        all nodes share the root context, so that will normally be the upmost
        common ancestor.

        """
        if other is self:
            i = self.parent_index()
            return self.parent, (i,), (i,)
        if other.pos > self.pos:
            s_ancestors, s_indices = zip(*self.ancestors_with_index())
            o_indices = []
            for n, i in other.ancestors_with_index():
                o_indices.append(i)
                try:
                    s_i = s_ancestors.index(n)
                except ValueError:
                    continue
                return n, s_indices[s_i::-1], o_indices[::-1]
        return None, None, None

[docs]    def range(self, other):
        """Return a :class:`Range` from this token upto and including the other.

        Returns None if the other :class:`Token` does not belong to the same
        tree.

        """
        context, start_trail, end_trail = self.common_ancestor_with_trail(other)
        if context:
            return Range(context, start_trail, end_trail)


[docs]class GroupToken(Token):
    """A Token class that allows setting the `group` attribute.

    For normal Token instances, `group` is a class attribute that is always
    None. For Tokens that belong to a group, i.e. originated from a single
    regular expression match, the `group` attribute is the index of the token
    in the group of tokens that were created together.

    The last token in the group has a negative value, so it can be recognized
    as the last. For example, tokens of a three-group have the indices 0, 1 and
    -2.

    The methods :meth:`get_group`, :meth:`get_group_start` and
    :meth:`get_group_end` can only be reliably used when there are no tokens
    deleted from the tree, and when the tokens really have a parent.

    """
    __slots__ = "group",

    def __init__(self, group, parent, pos, text, action):
        self.group = group  #: The index of this token in a group (negated for the last token in a group)
        super().__init__(parent, pos, text, action)

[docs]    def copy(self, parent=None):
        """Return a copy of the Token, but with the specified parent."""
        return type(self)(self.group, parent, self.pos, self.text, self.action)

[docs]    @classmethod
    def make_group(cls, parent, lexemes):
        """Create a tuple of GroupTokens for the lexemes."""
        group = tuple(cls(n, parent, *t) for n, t in enumerate(lexemes))
        group[-1].group *= -1
        return group

[docs]    def get_group(self):
        """Return the whole group this token belongs to as a list."""
        p = self.parent
        i = j = self.parent_index()
        z = len(p) - 1
        if self.group < 0:
            # we are at the last
            i += self.group
        else:
            i -= self.group
            j += 1
            while j < z and p[j].group > 0:
                j += 1
        return p[i:j+1]

[docs]    def get_group_start(self):
        """Return the first token of the group this token belongs to."""
        i = self.parent_index()
        if self.group < 0:
            i += self.group
        else:
            i -= self.group
        return self.parent[i]

[docs]    def get_group_end(self):
        """Return the last token of the group this token belongs to."""
        p = self.parent
        i = self.parent_index()
        z = len(p) - 1
        if self.group >= 0:
            i += 1
            while i < z and p[i].group > 0:
                i += 1
        return p[i]


[docs]class Context(list, Node):
    """A Context represents a list of tokens and contexts.

    The lexicon that created the tokens is in the `lexicon` attribute.

    If a pattern rule jumps to another lexicon, a sub-Context is created and
    tokens are added there. If that lexicon pops back to the current one, new
    tokens can appear after the sub-context. (So the token that caused the jump
    to the sub-context normally preceeds the context it created.)

    A context has a `parent` attribute, which can point to an enclosing
    context. The root context has `parent` None.

    When iterating over the children of a Context (which may be Context or
    Token instances), you can use the `is_context` attribute to determine
    whether the node child is a context, which is easier than to call
    `isinstance(node, Context)` each time.

    You can quickly find tokens in a context, based on text::

        if "bla" in context:
            # etc

    Or child contexts, based on lexicon::

        if MyLanguage.lexicon in context:
            # etc

    And if you want to know which token is on a certain position in the text,
    use e.g.::

        context.find_token(45)

    which, using a bisection algorithm, quickly returns the token, which
    might be in any sub-context of the current context.

    """
    __slots__ = "lexicon", "_parent"

    is_context = True   #: Always True for Context

    def __new__(cls, lexicon, parent):
        return list.__new__(cls)

    def __init__(self, lexicon, parent):
        self.lexicon = lexicon  #: The lexicon this context was instantiated with.
        self.parent = parent

    def __repr__(self):
        pos, end = self.pos, self.end
        if pos == end:
            pos = end = "?" # both are 0 in this case: empty Context
        name = self.lexicon and repr(self.lexicon)
        children = "child" if len(self) == 1 else "children"
        return "<Context {} at {}-{} ({} {})>".format(
            name, pos, end, len(self), children)

    def __hash__(self):
        return Node.__hash__(self)

    def __eq__(self, other):
        if isinstance(other, Lexicon):
            return self.lexicon == other
        return other is self

    def __ne__(self, other):
        if isinstance(other, Lexicon):
            return self.lexicon != other
        return other is not self

    @property
    def ls(self):
        """List the contents of this Context, for debugging purposes."""
        for i, n in enumerate(self):
            print("[{}] {}".format(i, repr(n)))

[docs]    def copy(self, parent=None):
        """Return a copy of the context, but with the specified parent."""
        # a non-recursive implementation due to Python's recursion limits
        copy = copy_root = type(self)(self.lexicon, parent)
        n = self
        i = 0
        while True:
            z = len(n)
            while i < z:
                m = n[i]
                if m.is_context:
                    copy.append(type(m)(m.lexicon, copy))
                    copy = copy[-1]
                    i = 0
                    n = m
                    break
                else:
                    copy.append(m.copy(copy))
                    i += 1
            else:
                if copy is copy_root:
                    break
                n = n.parent
                copy = copy.parent
                i = len(copy)
        return copy_root

    @property
    def pos(self):
        """Return the position or our first token. Returns 0 if empty."""
        try:
            node = self[0]
            while node.is_context:
                node = node[0]
            return node.pos
        except IndexError:
            return 0

    @property
    def end(self):
        """Return the end position or our last token. Returns 0 if empty."""
        try:
            node = self[-1]
            while node.is_context:
                node = node[-1]
            return node.end
        except IndexError:
            return 0

[docs]    def is_root(self):
        """Return True if this Context has no parent node."""
        return self.parent is None

[docs]    def height(self):
        """Return the height of the tree (the longest distance to a descendant)."""
        if not self:
            return 0
        stack = []
        height = 0
        i = 0
        n = self
        while True:
            for i in range(i, len(n)):
                m = n[i]
                if m.is_context:
                    stack.append(i)
                    height = max(height, len(stack))
                    i = 0
                    n = m
                    break
            else:
                if stack:
                    n = n.parent
                    i = stack.pop() + 1
                else:
                    return height + 1

[docs]    def tokens(self, reverse=False):
        """Yield all Tokens, descending into nested Contexts.

        If ``reverse`` is set to True, yield all tokens in backward direction.

        """
        children = reversed if reverse else iter
        stack = []
        gen = children(self)
        while True:
            for n in gen:
                if n.is_token:
                    yield n
                else:
                    stack.append(gen)
                    gen = children(n)
                    break
            else:
                if stack:
                    gen = stack.pop()
                else:
                    break

[docs]    def first_token(self):
        """Return our first Token."""
        try:
            node = self[0]
            while node.is_context:
                node = node[0]
            return node
        except IndexError:
            pass

[docs]    def last_token(self):
        """Return our last token."""
        try:
            node = self[-1]
            while node.is_context:
                node = node[-1]
            return node
        except IndexError:
            pass

[docs]    def find(self, pos):
        """Return the index of our child at (or to the right of) pos.

        Returns -1 if there is no such child.

        """
        i = 0
        hi = l = len(self)
        while i < hi:
            mid = (i + hi) // 2
            n = self[mid]
            if n.end <= pos:
                i = mid + 1
            else:
                hi = mid
        return -1 if i == l else i

[docs]    def find_context(self, pos):
        """Return the younghest Context at position (or self)."""
        node = self
        i = self.find(pos)
        if i != -1:
            n = node[i]
            while n.is_context and n.pos <= pos:
                node = n
                n = n[n.find(pos)]
        return node

[docs]    def find_token(self, pos):
        """Return the Token at or to the right of position.

        Returns None if there is no such token.

        """
        i = self.find(pos)
        if i != -1:
            n = self[i]
            while n.is_context:
                n = n[n.find(pos)]
            return n

[docs]    def find_token_with_trail(self, pos):
        """Return the Token at or to the right of position, and the trail of indices.

        The trail is the list of indices where the token was found. Returns
        (None, None) if there is no such token. Here is an example::

            >>> import parce
            >>> tree = parce.root(parce.find('css'), open('parce/themes/default.css').read())
            >>> tree.find_token_with_trail(600)
            (<Token ' Selected te...ow has focus ' at 566:607 (Comment)>, [21, 0])
            >>> tree[21][0]
            <Token ' Selected te...ow has focus ' at 566:607 (Comment)>

        """
        i = self.find(pos)
        if i != -1:
            n = self[i]
            trail = [i]
            while n.is_context:
                i = n.find(pos)
                n = n[i]
                trail.append(i)
            return n, trail
        return None, None

[docs]    def find_left(self, pos):
        """Return the index of our child at or to the left of pos.

        Returns -1 if there is no such child.

        """
        i = 0
        hi = len(self)
        while i < hi:
            mid = (i + hi) // 2
            n = self[mid]
            if n.pos < pos:
                i = mid + 1
            else:
                hi = mid
        return i - 1

[docs]    def find_token_left(self, pos):
        """Return the Token at or to the left of position.

        Returns None if there is no such token.

        """
        i = self.find_left(pos)
        if i != -1:
            n = self[i]
            while n.is_context:
                n = n[n.find_left(pos)]
            return n

[docs]    def find_token_left_with_trail(self, pos):
        """Return the Token at or to the left of position, and the trail of indices.

        Returns (None, None) if there is no such token.

        """
        i = self.find_left(pos)
        if i != -1:
            n = self[i]
            trail = [i]
            while n.is_context:
                i = n.find_left(pos)
                n = n[i]
                trail.append(i)
            return n, trail
        return None, None

[docs]    def find_token_after(self, pos):
        """Return the first token completely right from pos.

        Returns None if there is no token right from pos.

        """
        node = self
        while True:
            i = 0
            hi = l = len(node)
            while i < hi:
                mid = (i + hi) // 2
                n = node[mid]
                if n.is_context:
                    n = n.last_token()
                if n.pos < pos:
                    i = mid + 1
                else:
                    hi = mid
            if i >= l:
                return
            node = node[i]
            if node.is_token:
                return node

[docs]    def find_token_before(self, pos):
        """Return the last token completely left from pos.

        Returns None if there is no token left from pos.

        """
        node = self
        while True:
            i = 0
            hi = len(node)
            while i < hi:
                mid = (i + hi) // 2
                n = node[mid]
                if n.is_context:
                    n = n.first_token()
                if pos < n.end:
                    hi = mid
                else:
                    i = mid + 1
            if i == 0:
                return
            node = node[i-1]
            if node.is_token:
                return node

[docs]    def range(self, start=0, end=None):
        """Return a :class:`Range`.

        The ancestor of the range is the common ancestor of the tokens found at
        start and end (or the context itself if start or end fall outside this
        context). If start is 0 and end is None, the range encompasses the full
        context.

        Returns None if this context is empty.

        """
        return Range.from_tree(self, start, end)


[docs]class Range:
    """A Range denotes a range of a tree structure.

    A range is defined by an ancestor context and possibly empty lists pointing
    to the start and end token, if specified. If both trails are not specified,
    the range encompasses the full context.

    """
    def __init__(self, ancestor, start_trail=None, end_trail=None):
        self.ancestor = ancestor                #: The specified ancestor
        self.start_trail = start_trail or []    #: The specified start trail (empty list by default)
        self.end_trail = end_trail or []        #: The specified end trail (empty list by default)

    def __repr__(self):
        return "<{} {} [{}:{}]>".format(type(self).__name__, self.ancestor.lexicon, self.pos, self.end)

    @property
    def pos(self):
        """The position of the first token in our range."""
        n = self.ancestor
        for i in self.start_trail:
            n = n[i]
        return n.pos

    @property
    def end(self):
        """The end position of the last token in our range."""
        n = self.ancestor
        for i in self.end_trail:
            n = n[i]
        return n.end

[docs]    @classmethod
    def from_tree(cls, tree, start=0, end=None):
        """Create a Range.

        The ancestor is the common ancestor of the tokens found at start and
        end (or the tree itself if start or end fall outside the range of the
        tree). If start is 0 and end is None, the range encompasses the full
        tree.

        Returns None if the tree is empty.

        """
        if not tree:
            return # empty
        context = tree
        if end is not None and end < tree.end:
            if end <= start:
                return
            end_trail = tree.find_token_left_with_trail(end)[1]
            if not end_trail:
                return
        else:
            end_trail = []
        if start > 0:
            start_trail = tree.find_token_with_trail(start)[1]
            if not start_trail:
                return
            if end_trail:
                # find the youngest common ancestor
                for n, (i, j) in enumerate(zip(start_trail, end_trail)):
                    if i != j or context[i].is_token:
                        break
                    context = context[i]
                if n:
                    del start_trail[:n]
                    del end_trail[:n]
        else:
            start_trail = []
        return cls(context, start_trail, end_trail)

[docs]    def slices(self, target_factory=None):
        """Yield (context, slice) tuples.

        The yielded slices include the tokens at the end of start and end
        trail.

        If you specify a ``target_factory``, it should be a
        :class:`~.target.TargetFactory` object, and it will be updated along
        with the yielded slices.

        """
        if self.start_trail:
            start = self.start_trail[0]
            if len(self.start_trail) > 1:
                ancestors = []
                n = self.ancestor[start]
                for i in self.start_trail[1:]:
                    ancestors.append((n, i))
                    n = n[i]
                yield ancestors[-1][0], slice(i, None) # include start token
                for p, i in ancestors[-2::-1]:
                    target_factory and target_factory.pop()
                    yield p, slice(i + 1, None)
                target_factory and target_factory.pop()
                start += 1
        else:
            start = 0
        if self.end_trail:
            end = self.end_trail[0]
            if len(self.end_trail) == 1:
                yield self.ancestor, slice(start, end + 1)    # include end token
            else:
                yield self.ancestor, slice(start, end)
                n = self.ancestor[end]
                for end in self.end_trail[1:-1]:
                    target_factory and target_factory.push(n.lexicon)
                    yield n, slice(end)
                    n = n[end]
                target_factory and target_factory.push(n.lexicon)
                yield n, slice(self.end_trail[-1] + 1)   # include end token
        else:
            yield self.ancestor, slice(start, None)

[docs]    def tokens(self):
        """Yield all tokens in this range.

        The first and last tokens may overlap with the start and end positions.

        """
        for context, slice_ in self.slices():
            yield from util.tokens(context[slice_])



[docs]def make_tokens(lexemes, parent=None):
    """Factory returning a tuple of one or more :class:`Token` instances for
    the lexemes.

    The ``lexemes`` argument is an iterable of three-tuples like the
    ``lexemes`` in an :class:`~parce.lexer.Event` namedtuple defined in the
    :mod:`~parce.lexer` module. If there is more than one lexeme,
    :class:`GroupToken` instances are created.

    The specified ``parent`` context is set as parent, if given.

    """
    if len(lexemes) > 1:
        return GroupToken.make_group(parent, lexemes)
    else:
        return Token(parent, *lexemes[0]),