numbers#

Parse numerals in different languages.

The Language definitions in this module do not make much sense for syntax highlighting, but can be used to read numbers from text in various languages, and serve as a proof-of-concept that nesting Contexts can be used to give a meaning to the contained text, which shows how parce can go beyond basic tokenizing.

Here is an example, showing the nesting of the matched tokens (that all have the Number standard action):

>>> from parce.lang.numbers import Deutsch, English, Francais, Nederlands
>>> from parce import root
>>> root(Nederlands.root, "eenentwintig").dump()
<Context Nederlands.root at 0-12 (1 child)>
 ╰╴<Context Nederlands.number at 0-12 (1 child)>
    ╰╴<Context Nederlands.p6 at 0-12 (1 child)>
       ╰╴<Context Nederlands.p3 at 0-12 (1 child)>
          ╰╴<Context Nederlands.p2 at 0-12 (1 child)>
             ╰╴<Context Nederlands.n99 at 0-12 (2 children)>
                ├╴<Token 'een' at 0:3 (Literal.Number)>
                ╰╴<Token 'twintig' at 5:12 (Literal.Number)>
>>> root(Nederlands.root, "twaalfhonderdeenentwintig").dump()
<Context Nederlands.root at 0-25 (1 child)>
 ╰╴<Context Nederlands.number at 0-25 (1 child)>
    ╰╴<Context Nederlands.p6 at 0-25 (1 child)>
       ╰╴<Context Nederlands.p3 at 0-25 (2 children)>
          ├╴<Context Nederlands.p2 at 0-13 (2 children)>
          │  ├╴<Context Nederlands.n99 at 0-6 (1 child)>
          │  │  ╰╴<Token 'twaalf' at 0:6 (Literal.Number)>
          │  ╰╴<Token 'honderd' at 6:13 (Literal.Number)>
          ╰╴<Context Nederlands.n99 at 13-25 (2 children)>
             ├╴<Token 'een' at 13:16 (Literal.Number)>
             ╰╴<Token 'twintig' at 18:25 (Literal.Number)>
>>> root(English.root, "FiftySixThousandSevenHundredEightyNine").dump()
<Context English.root at 0-38 (1 child)>
 ╰╴<Context English.number at 0-38 (1 child)>
    ╰╴<Context English.p6 at 0-38 (4 children)>
       ├╴<Context English.p3 at 0-16 (2 children)>
       │  ├╴<Context English.p2 at 0-8 (2 children)>
       │  │  ├╴<Context English.n99 at 0-5 (1 child)>
       │  │  │  ╰╴<Token 'Fifty' at 0:5 (Literal.Number)>
       │  │  ╰╴<Context English.p1 at 5-8 (1 child)>
       │  │     ╰╴<Token 'Six' at 5:8 (Literal.Number)>
       │  ╰╴<Token 'Thousand' at 8:16 (Literal.Number)>
       ├╴<Context English.p2 at 16-28 (2 children)>
       │  ├╴<Context English.n99 at 16-21 (1 child)>
       │  │  ╰╴<Token 'Seven' at 16:21 (Literal.Number)>
       │  ╰╴<Token 'Hundred' at 21:28 (Literal.Number)>
       ├╴<Context English.n99 at 28-34 (1 child)>
       │  ╰╴<Token 'Eighty' at 28:34 (Literal.Number)>
       ╰╴<Context English.p1 at 34-38 (1 child)>
          ╰╴<Token 'Nine' at 34:38 (Literal.Number)>

The accompanying Transform classes are used to get the parsed results. Multiple values are automatically detected (the result is always a list), and case does not matter. For example:

>>> from parce.transform import transform_text
>>> transform_text(English.root, "one two THREE")
[1, 2, 3]
>>> transform_text(Nederlands.root, "eenentwintig")
[21]

In this module:#

Language

Name (Aliases)

Description

Filename(s)

Mime Type(s)

Deutsch

Deutsch

German Numbers

English

English

English Numbers

Français

Français

French Numbers

Nederlands

Nederlands

Dutch Numbers

class English[source]#

Bases: Numbers

Parse English numbers.

n99#

Numerical value below 100.

p1#

Numerical value after a tenfold (e.g. ‘three’ after ‘eighty’).

class EnglishTransform[source]#

Bases: NumbersTransform

Compute the value for English numbers.

The result is a list of the numbers that were found. Whitespace and hyphens are skipped; multiple values are automatically detected. Case does not matter.

For example:

>>> from parce.transform import transform_text
>>> from parce.lang.numbers import English
>>> transform_text(English.root, "one two THREE")
[1, 2, 3]
>>> transform_text(English.root, "fiftysix")
[56]
>>> transform_text(English.root, "FiftySixThousandSevenHundredEightyNine")
[56789]
>>> transform_text(English.root, "twelve hundred thirty four")
[1234]
>>> transform_text(English.root, "twelve hundred thirty four five")
[1234, 5]
>>> transform_text(English.root, "Twelve Hundred Thirty Four Twenty Five")
[1234, 25]
p1(items)#

The numerical value (below 100) of a text string.

ENGLISH_TENS = ('twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety')#

English tens from 20 upto and including 90

ENGLISH_TO19 = ('zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen')#

English numerals from 0 to 19

class Nederlands[source]#

Bases: Numbers

Parse Dutch numbers.

n99#

Numerical value below 100.

class NederlandsTransform[source]#

Bases: NumbersTransform

Compute the value for Dutch numbers.

The result is a list of the numbers that were found. Whitespace and hyphens are skipped; multiple values are automatically detected. Case does not matter.

For example:

>>> from parce.transform import transform_text
>>> from parce.lang.numbers import Nederlands
>>> transform_text(Nederlands.root, "een twee DRIE")
[1, 2, 3]
>>> transform_text(Nederlands.root, "zesenvijftig")
[56]
>>> transform_text(Nederlands.root, "ZesenVijftigDuizendZevenhonderdNegenenTachtig")
[56789]
>>> transform_text(Nederlands.root, "twaalfhonderd vier en dertig")
[1234]
>>> transform_text(Nederlands.root, "twaalfhonderd vier en dertig vijf")
[1234, 5]
>>> transform_text(Nederlands.root, "twaalfhonderd vier en dertig vijf en twintig")
[1234, 25]
NEDERLANDS_TENS = ('twintig', 'dertig', 'veertig', 'vijftig', 'zestig', 'zeventig', 'tachtig', 'negentig')#

Dutch tens from 20 upto and including 90

NEDERLANDS_TO19 = ('nul', 'een', 'twee', 'drie', 'vier', 'vijf', 'zes', 'zeven', 'acht', 'negen', 'tien', 'elf', 'twaalf', 'dertien', 'veertien', 'vijftien', 'zestien', 'zeventien', 'achttien', 'negentien')#

Dutch numerals from 0 to 19

class Deutsch[source]#

Bases: Numbers

Parse German numbers.

Both 'ein' and eins are allowed, and besides 'dreißig' also 'dreissig' is supported.

n99#

Numerical value below 100.

class DeutschTransform[source]#

Bases: NumbersTransform

Compute the value for German numbers.

Both 'ein' and eins are allowed, and besides 'dreißig' also 'dreissig' is supported.

The result is a list of the numbers that were found. Whitespace and hyphens are skipped; multiple values are automatically detected. Case does not matter.

For example:

>>> from parce.transform import transform_text
>>> from parce.lang.numbers import Deutsch
>>> transform_text(Deutsch.root, "ein zwei DREI")
[1, 2, 3]
>>> transform_text(Deutsch.root, "eins zwei DREI")
[1, 2, 3]
>>> transform_text(Deutsch.root, "Sechsundfünfzig")
[56]
>>> transform_text(Deutsch.root, "Sechsundfünfzig Tausend Siebenhundert NeunundAchtzig")
[56789]
>>> transform_text(Deutsch.root, "Zwölfhundert Vierunddreißig")
[1234]
>>> transform_text(Deutsch.root, "Zwölfhundert Vierunddreissig Fünf")
[1234, 5]
>>> transform_text(Deutsch.root, "Zwölfhundert Vierunddreißig Fünf und Zwanzig")
[1234, 25]
DEUTSCH_TENS = ('zwanzig', 'dreißig', 'vierzig', 'fünfzig', 'sechzig', 'siebzig', 'achtzig', 'neunzig')#

German tens from 20 upto and including 90

DEUTSCH_TO19 = ('null', 'ein', 'zwei', 'drei', 'vier', 'fünf', 'sechs', 'sieben', 'acht', 'neun', 'zehn', 'elf', 'zwölf', 'dreizehn', 'vierzehn', 'fünfzehn', 'sechzehn', 'siebzehn', 'achtzehn', 'neunzehn')#

German numerals from 0 to 19

class Français[source]#

Bases: Numbers

Parse French numbers.

Supports both 'zéro' and 'zero', and allows for the 's' after "quatre-vingt", "cent", "million".

n99#

Numerical value below 100.

class FrançaisTransform[source]#

Bases: NumbersTransform

Compute the value for French numbers.

Supports both 'zéro' and 'zero', and allows for the 's' after "quatre-vingt", "cent", "million".

The result is a list of the numbers that were found. Whitespace and hyphens are skipped; multiple values are automatically detected. Case does not matter.

For example:

>>> from parce.transform import transform_text
>>> from parce.lang.numbers import Francais
>>> transform_text(Francais.root, 'un deux TROIS')
[1, 2, 3]
>>> transform_text(Francais.root, 'cinquante-six')
[56]
>>> transform_text(Francais.root, 'cinquante-six mille sept-cents quatre-vingt neuf')
[56789]
>>> transform_text(Francais.root, 'mille deux cent trente-quatre')
[1234]
>>> transform_text(Francais.root, 'mille deux cent trente-quatre cinq')
[1234, 5]
>>> transform_text(Francais.root, 'mille deux cent trente-quatre vingt-cinq')
[1234, 25]
FRANCAIS_TENS = ('vingt', 'trente', 'quarante', 'cinquante', 'soixante', 'soixante-dix', 'quatre-vingt', 'quatre-vingt-dix')#

French tens from 20 upto and including 90

FRANCAIS_TO19 = ('zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix', 'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf')#

French numerals from 0 to 19