Source code for parce.lang.bash

# -*- coding: utf-8 -*-
#
# This file is part of the parce Python package.
#
# Copyright © 2021-2021 by Wilbert Berendsen <info@wilbertberendsen.nl>
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""
Bash and other UNIX shell (sh) syntax.

"""

__all__ = ('Bash',)

import re

from parce import Language, lexicon, skip, default_action, default_target
from parce.action import *
from parce.rule import *


# Main source of information: man bash :-)


RE_NAME = r'[^\W\d]\w*(?:[-.+]\w+)*'
RE_COMMAND = r'''[^\s|&;()<>$"'`#][^\s|&;()<>$"'`]*'''
RE_BRACE = (
    r'''([^|&;()<>\s"'`!{}]*)'''   # preamble
    r'(\{)'     # brace {
    r'(?:'
        r'(?:\w\.\.\w|\d+\.\.\d+)(?:\.\.\d+)?'   # sequence expr
        r'''|[^|&;()<>\s"'`!{}]*(?:,[^|&;()<>\s"'`!{}]*)+'''  # comma-separated strings
    r')'       # expand expr
    r'(\})'     # brace }
    r'''([^|&;()<>\s"'`!{}]*)'''   # postscript
)
RE_WORD = r'''[^|&;()<>\s$"'`!\\]+'''


[docs]class Bash(Language): """Bash and other shell syntax.""" @lexicon(re_flags=re.MULTILINE) def root(cls): """Find one or mode command lines. This lexicon is derived with the special character ````` if called from the :meth:`backtick` lexicon, or with ```)``` from the :meth:`subshell` lexicon. """ yield r'\A#!.*?$', Comment.Special yield '#', Comment, cls.comment yield '\n', skip yield default_target, derive(cls.command, ARG) @lexicon(re_flags=re.MULTILINE) def command(cls): """Find commands and arguments, pops back on line end or when ARG is ahead.""" arguments = derive(cls.arguments, ARG) # pass on the ` to arguments lexicon yield r'\\\n', Whitespace.Escape yield r'[ \t]+', skip yield '$', None, -1 yield r';', Delimiter, -1 yield arg(prefix='(?=', suffix=')'), None, -1 yield '#', Comment, cls.comment yield r'(\w+)(=)', bygroup(Name.Variable.Definition, Operator.Assignment), cls.assignment yield r'let\b', Name.Builtin, cls.let_expr yield r'\.(?=$|\s)', Keyword, arguments yield r'exec\b', Name.Builtin yield r'(local|export)[ \t]+(\w+)(=)?', bygroup(Name.Builtin, Name.Variable.Definition, Operator.Assignment), \ ifgroup(3, cls.assignment) yield r'({})(\(\))?(?=\s|$)'.format(RE_NAME), ifgroup(2, bygroup(findmember(TEXT, ( (BASH_KEYWORDS, Keyword.Invalid), (BASH_BUILTINS, Name.Builtin.Invalid), ), Name.Function.Definition), Bracket), findmember(TEXT, ( (BASH_KEYWORDS, Keyword), (BASH_BUILTINS, (Name.Builtin, arguments)), (UNIX_COMMANDS, (Name.Command.Definition, arguments)), ), (Name.Command, arguments))) yield r'\{(?=$|\s)', Bracket.Start, cls.group_command yield r'\[\[(?=$|\s)', Bracket.Start, cls.cond_expr yield r'\[(?=$|\s)', Bracket.Start, cls.test_expr yield RE_COMMAND, Name.Command, arguments yield r'\(', Delimiter.Start, cls.subshell yield r'\|\|?|\&\&?', Delimiter.Connection, -1 yield default_target, arguments @lexicon(re_flags=re.MULTILINE) def arguments(cls): """Arguments after a command, called from root.""" yield arg(prefix='(?=', suffix=')'), None, -2 yield r'\\\n', Whitespace.Escape yield r'[ \t]+', skip yield from cls.common() yield default_target, -1
[docs] @classmethod def common(cls): """Yield common stuff: comment, expression, expansions, etc.""" yield '#', Comment, cls.comment yield RE_BRACE, using(cls.brace_expansion) yield r'\(\(', Delimiter.Start, cls.arith_expr yield r'(\{\w+\}|\d+)?(<<<)[ \t]*', bygroup(Name.Identifier, Delimiter.Direction), cls.here_string yield r'(\{\w+\}|\d+)?(<<-?)[ \t]*(?=(\w+)|"([^"\n]+)"|' r"'([^'\n]+)')", \ bygroup(Name.Identifier, Delimiter.Direction), \ derive(ifgroup(3, cls.here_document, cls.here_document_quoted), call(cls.make_heredoc_regex, MATCH)), \ cls.command, cls.arguments yield r'(\{\w+\}|\d+)?(&>>?|[<>][&>]?)(\d?-?)', bygroup(Name.Identifier, Delimiter.Direction, Name.Identifier) yield from cls.substitution() yield from cls.quoting() yield r'-[\w-]+', Name.Property # option is_pattern = lambda t: '*' in t or '?' in t or '[' and ']' in t yield RE_WORD, select(call(is_pattern, TEXT), Text, Text.Template)
[docs] @classmethod def numeric_common(cls): """Nummeric values.""" yield r'0\d+', Number.Octal yield r'0[xX][0-9a-fA-F]+', Number.Hexadecimal yield r'\d+#[0-9a-zA-Z@_]+', Number yield r'\d+', Number
[docs] @classmethod def expression_common(cls): """Common things in expressions.""" yield from cls.numeric_common() yield r'(\w+)[ \t]*(=)', bygroup(Name.Variable.Definition, Operator.Assignment) yield r'\w+', Name.Variable yield r',', Delimiter.Separator yield r'(?:[*/%+&\-|]|<<|>>)=', Operator.Assignment yield r'\+\+?|--?|\*\*?|<[=<]?|>[=>]?|&&?|\|\|?|[=!]=|[~!/%^?:]', Operator yield r'=', Operator.Assignment
[docs] @classmethod def substitution(cls): """Variable expansion with ``$``.""" yield r'(\$)(\(\()', bygroup(Name.Variable, Delimiter.Start), cls.arith_expr yield r'(\$)(\()', bygroup(Name.Variable, Delimiter.Start), cls.subshell yield r'\$[*@#?\$!0-9-]', Name.Variable.Special yield r'\$\w+', Name.Variable yield r'(\$\{)([!#@*])?(\w*)', bygroup(Name.Variable, Delimiter.ModeChange, Name.Variable), cls.parameter yield r'`', Delimiter.Quote, cls.backtick
[docs] @classmethod def quoting(cls): """Escape, single and double quotes.""" yield r'\\.', Escape yield r'"', String.Start, cls.dqstring yield r"'", String.Start, cls.sqstring yield r"\$'", String.Start, cls.escape_string yield r'\$"', String.Start, cls.dqstring # translated string
@lexicon def brace_expansion(cls): """Used to parse a brace expansion.""" yield from cls.substitution() yield from cls.quoting() yield default_action, Text.Preprocessed
[docs] @classmethod def make_heredoc_regex(cls, m): """Make a regular expression to terminate the here doc with. The returned pattern is used to terminate the both here_document lexicons with. """ pat = m.group(m.lastindex + 5) or m.group(m.lastindex + 4) or m.group(m.lastindex + 3) if m.group(m.lastindex + 2) == "<<-": # allow stripping tabs from doc and delimiter return r'^\t*(' + re.escape(pat) + r')[\t ]*$' else: return r'^(' + re.escape(pat) + r')[\t ]*$'
@lexicon(re_flags=re.MULTILINE) def here_document(cls): """A here document that is expanded, terminated by ARG.""" yield arg(escape=False), bygroup(Name.Identifier), -1 yield from cls.substitution() yield default_action, Verbatim @lexicon(re_flags=re.MULTILINE) def here_document_quoted(cls): """A here document that's not expanded, terminated by ARG.""" yield arg(escape=False), bygroup(Name.Identifier), -1 yield default_action, Verbatim @lexicon(re_flags=re.MULTILINE) def here_string(cls): """A here-string, the text after ``<<<``.""" yield from cls.substitution() yield from cls.quoting() yield RE_WORD, Verbatim yield default_target, -1 @lexicon(re_flags=re.MULTILINE) def assignment(cls): """An assignment, the text after ``=``.""" yield from cls.substitution() yield from cls.quoting() yield from cls.numeric_common() yield RE_WORD, Verbatim yield default_target, -1 @lexicon def dqstring(cls): """A double-quoted string.""" yield r'"', String.End, -1 yield r'\\[\\$`"\n]', String.Escape yield from cls.substitution() yield default_action, String @lexicon def sqstring(cls): """A single-quoted string.""" yield r"'", String.End, -1 yield default_action, String @lexicon def escape_string(cls): """A single-quoted string.""" yield r"'", String.End, -1 yield r'\\(?:[abeEfnrtv\\\"\'?]|\d{3}|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|c.)', String.Escape yield default_action, String @lexicon def backtick(cls): r"""Stuff between ````` ... `````.""" yield r'`', Delimiter.Quote, -1 yield from cls.root('`') @lexicon def parameter(cls): """Contents of ``${`` ... ``}``.""" yield r'\}', Name.Variable, -1 yield r'\d+', Number yield from cls.substitution() yield r'\[', Delimiter.Bracket.Start, cls.subscript yield r':[-=?+]?|##?|%%?|\^\^?|,,?|@', Delimiter.ModeChange is_pattern = lambda t: '*' in t or '?' in t or '[' and ']' in t yield r'[\w*\.?\[\]]+', select(call(is_pattern, TEXT), Name.Variable, Text.Template) @lexicon def subscript(cls): """Contents of ``[`` ... ``]`` in an array reference.""" yield r'\]', Delimiter.Bracket.End, -1 yield '[@*]', Character.Special # makes sense in ${bla[@]} yield from cls.expression_common() yield from cls.substitution() yield from cls.quoting() @lexicon def subshell(cls): """A subshell ``(`` ... ``)``.""" yield r'\)', Delimiter.End, -1 yield from cls.root(')') @lexicon def group_command(cls): """A group command ``{ ...; }``.""" yield r'\}', Bracket.End, -1 yield from cls.root # expressions @lexicon(re_flags=re.MULTILINE) def let_expr(cls): """An expression after ``let``.""" yield r'$', None, -1 yield r';', Delimiter, -1 yield from cls.expression_common() yield from cls.common() @lexicon def arith_expr(cls): """An arithmetic expression ``((`` ... ``))``.""" yield r'\)\)', Delimiter.End, -1 yield from cls.expression_common() yield from cls.common() @lexicon def cond_expr(cls): """A conditional expression ``[[`` ... ``]]``.""" yield r'\]\]', Bracket.End, -1 yield from cls.expression_common() yield from cls.common() @lexicon def test_expr(cls): """A test expression ``[`` ... ``]``.""" yield r'\]', Bracket.End, -1 yield r'-[\w-]+', Name.Property # option yield from cls.expression_common() yield from cls.common() @lexicon(re_flags=re.MULTILINE) def comment(cls): """A comment.""" yield r'$', None, -1 yield from cls.comment_common()
BASH_KEYWORDS = ( "case", "coproc", "do", "done", "elif", "else", "esac", "fi", "for", "function", "if", "in", "select", "then", "until", "while", "time", ) UNIX_COMMANDS = ( "alias", "ar", "at", "awk", "basename", "bc", "bg", "cal", "cat", "cd", "chgrp", "chmod", "chown", "cksum", "cmp", "comm", "cp", "crontab", "csplit", "ctags", "cut", "dd", "df", "diff", "dirname", "du", "echo", "ed", "egrep", "env", "ex", "exit", "expr", "false", "fg", "file", "find", "fold", "fuser", "grep", "head", "iconv", "join", "kill", "lex", "ln", "logname", "lp", "ls", "m4", "make", "man", "mesg", "mkdir", "more", "mv", "nice", "nl", "nm", "od", "paste", "patch", "pax", "printf", "ps", "pwd", "rm", "rmdir", "sed", "sleep", "sort", "split", "strings", "strip", "tail", "talk", "tee", "test", "time", "touch", "tput", "tr", "true", "type", "umask", "uname", "uniq", "unset", "vi", "wait", "wc", "who", "write", "xargs", "yacc", "zip", ) BASH_BUILTINS = ( "source", "alias", "bg", "bind", "break", "builtin", "caller", "cd", "command", "compgen", "complete", "compopt", "continue", "declare", "typeset", "dirs", "disown", "echo", "enable", "exec", "exit", "export", "fc", "fg", "getopts", "hash", "help", "history", "jobs", "kill", "let", "local", "logout", "mapfile", "readarray", "popd", "printf", "pushd", "pwd", "read", "readonly", "return", "set", "shift", "shopt", "suspend", "test", "times", "trap", "type", "ulimit", "umask", "unalias", "unset", "wait", )