# -*- coding: utf-8 -*-
#
# This file is part of the parce Python package.
#
# Copyright © 2021-2021 by Wilbert Berendsen <info@wilbertberendsen.nl>
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
RFC-4180 compliant CSV format
"""
__all__ = ('Csv', 'CsvTransform')
import re
from parce import Language, lexicon, skip, default_action, default_target
from parce.rule import bygroup
from parce.transform import Transform
from parce.util import split_list
import parce.action as a
[docs]class Csv(Language):
"""RFC-4180 compliant CSV format."""
@lexicon
def root(cls):
"""Split a file in records."""
yield default_target, cls.record
@lexicon(re_flags=re.MULTILINE)
def record(cls):
"""Split a record in escaped (string) and non-escaped fields."""
yield r'$\n?', skip, -1
yield r'[^,"\n]+(?=$|,|\n)', a.Name
yield r'[ \t]*((?:[^,"\s]+[ \t]*)+)?(")', bygroup(a.Invalid, a.String.Start), cls.string
yield ',', a.Separator
@lexicon(consume=True)
def string(cls):
"""Handle a quoted string, escaping doubled quotes inside."""
yield r'""', a.String.Escape
yield r'(")[ \t]*([^,"\s]+)?', bygroup(a.String.End, a.Invalid), -1
yield default_action, a.String