Source code for parce.lang.c

# -*- coding: utf-8 -*-
#
# This file is part of the parce Python package.
#
# Copyright © 2019-2020 by Wilbert Berendsen <info@wilbertberendsen.nl>
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""
Parse C.

"""

__all__ = ('C',)

import re

from parce import Language, lexicon, default_action, default_target, skip
from parce.action import *
from parce.rule import *

# support C/C++ UCN
RE_C_IDENT_ESCAPE = _E_ = r'\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}'
RE_C_IDENT_START  = fr'[^\W\d]|\$|{_E_}'
RE_C_IDENT_CONT   = fr'[\w$]|{_E_}'
RE_C_IDENT = r'(?:{})(?:{})*'.format(RE_C_IDENT_START, RE_C_IDENT_CONT)

RE_C_NUMBER = (r'[-+]?(?:'
    r'0(?:([oO]?[0-7]+)'                            # 1 octal
        r'|([bB][01]+)'                             # 2 binary
        r'|([xX][0-9a-fA-F]+))'                     # 3 hexadecimal
    r'|((?:\d+(?:\.\d+)?|\.\d+)(?:[eE][-+]?\d+)?)'  # 4 decimal
    r')'
)


[docs]class C(Language): @lexicon def root(cls): """All C language constructs.""" yield r'(struct|union|enum)\b', Keyword, cls.class_name yield words(C_TYPES, suffix=r'\b'), Name.Type yield words(C89_WORDS + C99_WORDS + C11_WORDS, suffix=r'\b'), Keyword yield '"', String.Start, cls.string yield fr'({RE_C_IDENT})\s*(\()?', ifgroup(2, (bygroup(using(cls._func_name), Delimiter), cls.arguments), bygroup(using(cls._variable_name))) yield '//', Comment, cls.singleline_comment yield r'/\*', Comment.Start, cls.multiline_comment yield r'\{', Bracket.Start, cls.compound yield '#', Delimiter.Preprocessed, cls.macro yield r'\(', Delimiter.Start, cls.paren yield r';', Delimiter yield RE_C_NUMBER, gselect(Number.Octal, Number.Binary, Number.Hexadecimal, Number.Decimal) yield r',', Delimiter.Separator yield r'(?:[*/%+&\-|]|<<|>>)=', Operator.Assignment yield r'\+\+?|--?|\*\*?|<[=<]?|>[=>]?|&&?|\|\|?|[=!]=|[~!/%^?:]', Operator yield r'=', Operator.Assignment @lexicon def compound(cls): """Stuff between ``{`` ... ``}``.""" yield r'\}', Bracket.End, -1 yield from cls.root @lexicon def paren(cls): """Stuff between ``(`` ... ``)``.""" yield r'\)', Delimiter, -1 yield from cls.root @lexicon def arguments(cls): """Stuff between ``name(`` ... ``)``.""" yield r'\)', Delimiter, -1 yield from cls.root @lexicon def class_name(cls): """The class name after struct, union or enum.""" yield RE_C_IDENT, using(cls._class_name), -1 yield r'\s+', skip yield default_target, -1 @lexicon(re_flags=re.MULTILINE) def string(cls): """A double-quoted string.""" yield r'"', String.End, -1 yield r'\\["\\nrbtfav?]', String.Escape yield RE_C_IDENT_ESCAPE, String.Escape yield r'\\.', String.Invalid yield r'[^"]*?$', String.Invalid, -1 yield default_action, String @lexicon def macro(cls): """Stuff after ``#``.""" yield r'include\b', Keyword.Preprocessed yield '"', String.Start, cls.string yield r'<.*?>', String.Template yield r'[ \t]', skip yield default_target, -1 # these lexicons are used to split escaped parts out of long # var/class/funcnames @lexicon def _class_name(cls): yield RE_C_IDENT_ESCAPE, Escape yield default_action, Name.Class @lexicon def _func_name(cls): yield RE_C_IDENT_ESCAPE, Escape yield default_action, Name.Function @lexicon def _variable_name(cls): yield RE_C_IDENT_ESCAPE, Escape yield default_action, Name.Variable #------------------ comments ------------------------- @lexicon(re_flags=re.MULTILINE) def singleline_comment(cls): yield '$', None, -1 yield from cls.comment_common() @lexicon def multiline_comment(cls): yield r'\*/', Comment.End, -1 yield from cls.comment_common()
# source: https://en.wikipedia.org/wiki/C_(programming_language) C_TYPES = ( "int", "bool", "char", "long", "double", "float", "signed", "unsigned", "short", ) C89_WORDS = ( "auto", "break", "case", "const", "continue", "default", "do", "double", "else", "enum", "extern", "for", "goto", "if", "register", "return", "sizeof", "static", "struct", "switch", "typedef", "union", "void", "volatile", "while", ) C99_WORDS = ( "_Bool", "_Complex", "_Imaginary", "inline", ) C11_WORDS = ( "_Alignas", "_Alignof", "_Atomic", "_Generic", "_Noreturn", "_Static_assert", "_Thread_local", )