from __future__ import annotations
__all__ = [
"RULE_ORDINAL_ONES",
"RULE_ORDINAL_TENS",
"RULE_ORDINAL_HUNDREDS",
"RULE_ORDINAL_THOUSANDS",
"RULE_ORDINAL_MILLIONS",
"RULE_ORDINAL_BILLIONS",
"RULE_ORDINAL_TRILLIONS",
"RULE_ORDINAL",
"parse_ordinal",
]
from maha.parsers.templates import FunctionValue
from maha.rexy import (
Expression,
ExpressionGroup,
named_group,
non_capturing_group,
optional_non_capturing_group,
)
from ..common import (
AFTER,
WAW_CONNECTOR,
combine_patterns,
spaced_patterns,
wrap_pattern,
)
from .values import *
def match_tens(match):
matched_text = match.group(0)
return parse_tens(matched_text)
def parse_tens(matched_text):
waw = WAW_CONNECTOR.search(matched_text)
if waw:
_ones, _tens = matched_text.split(waw.group(0))
value = (
ones_prefix.get_matched_expression(_ones).value # type: ignore
+ perfect_tens.get_matched_expression(_tens).value # type: ignore
)
return value
exp = perfect_tens.get_matched_expression(matched_text)
if not exp:
exp = eleven_to_nineteen.get_matched_expression(matched_text)
return exp.value # type: ignore
[docs]def parse_ordinal(match):
groups = match.capturesdict()
_trillions = groups.get("trillions")
_billions = groups.get("billions")
_millions = groups.get("millions")
_thousands = groups.get("thousands")
_hundreds = groups.get("hundreds")
_tens = groups.get("tens")
_ones = groups.get("ones")
_after_value = groups.get("after_value")
value = 0
def get_value(groups, expressions: list[ExpressionGroup | Expression]) -> int:
exp_group = ExpressionGroup(*expressions)
value = 0
for group in groups:
value += exp_group.get_matched_expression(group).value # type: ignore
return value
if _trillions:
value += get_value(_trillions, [ONE_TRILLION, TWO_TRILLIONS])
if _billions:
value += get_value(_billions, [ONE_BILLION, TWO_BILLIONS])
if _millions:
value += get_value(_millions, [ONE_MILLION, TWO_MILLIONS])
if _thousands:
value += get_value(_thousands, [ONE_THOUSAND, TWO_THOUSANDS])
if _hundreds:
value += get_value(_hundreds, [perfect_hundreds, ONE_HUNDRED, TWO_HUNDREDS])
if _tens:
value += parse_tens(_tens[0])
if _ones:
value += get_value(_ones, [ones])
if _after_value:
value += get_value(_after_value, [after_values])
return value
after_values = ExpressionGroup(
ONE_HUNDRED,
ONE_THOUSAND,
ONE_MILLION,
ONE_BILLION,
ONE_TRILLION,
)
ones = ExpressionGroup(ONE, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN)
ones_prefix = ExpressionGroup(
ONE_PREFIX,
TWO_PREFIX,
THREE_PREFIX,
FOUR_PREFIX,
FIVE_PREFIX,
SIX_PREFIX,
SEVEN_PREFIX,
EIGHT_PREFIX,
NINE_PREFIX,
)
perfect_tens = ExpressionGroup(
TEN, TWENTY, THIRTY, FORTY, FIFTY, SIXTY, SEVENTY, EIGHTY, NINETY
)
combined_tens = ones_prefix.join() + WAW_CONNECTOR + perfect_tens[1:].join() # type: ignore
eleven_to_nineteen = ExpressionGroup(
ELEVEN, TWELVE, THIRTEEN, FOURTEEN, FIFTEEN, SIXTEEN, SEVENTEEN, EIGHTEEN, NINETEEN
)
perfect_hundreds = ExpressionGroup(
ONE_HUNDRED,
TWO_HUNDREDS,
THREE_HUNDREDS,
FOUR_HUNDREDS,
FIVE_HUNDREDS,
SIX_HUNDREDS,
SEVEN_HUNDREDS,
EIGHT_HUNDREDS,
NINE_HUNDREDS,
)
# 10 11 12 13 14 ... 95 96 97 98 99
tens = non_capturing_group(
perfect_tens.join(),
combined_tens,
eleven_to_nineteen.join(),
)
tens_group = named_group("tens", tens)
ones_group = named_group("ones", ones.join())
hundreds_group = named_group(
"hundreds",
non_capturing_group(
perfect_hundreds.join(),
spaced_patterns(tens, ONE_HUNDRED),
spaced_patterns(ones_prefix.join(), ONE_HUNDRED),
),
)
thousands_group = named_group(
"thousands",
non_capturing_group(
spaced_patterns(tens, ONE_THOUSAND),
spaced_patterns(ones_prefix.join(), ONE_THOUSAND),
str(TWO_THOUSANDS),
str(ONE_THOUSAND),
),
)
millions_group = named_group(
"millions",
non_capturing_group(
spaced_patterns(tens, ONE_MILLION),
spaced_patterns(ones_prefix.join(), ONE_MILLION),
str(TWO_MILLIONS),
str(ONE_MILLION),
),
)
billions_group = named_group(
"billions",
non_capturing_group(
spaced_patterns(tens, ONE_BILLION),
spaced_patterns(ones_prefix.join(), ONE_BILLION),
str(TWO_BILLIONS),
str(ONE_BILLION),
),
)
trillions_group = named_group(
"trillions",
non_capturing_group(
spaced_patterns(tens, ONE_TRILLION),
spaced_patterns(ones_prefix.join(), ONE_TRILLION),
str(TWO_TRILLIONS),
str(ONE_TRILLION),
),
)
[docs]RULE_ORDINAL_ONES = FunctionValue(parse_ordinal, combine_patterns(ones_group))
[docs]RULE_ORDINAL_TENS = FunctionValue(parse_ordinal, combine_patterns(tens_group))
[docs]RULE_ORDINAL_HUNDREDS = FunctionValue(
parse_ordinal, combine_patterns(hundreds_group, tens_group, ones_group)
)
[docs]RULE_ORDINAL_THOUSANDS = FunctionValue(
parse_ordinal,
combine_patterns(thousands_group, hundreds_group, tens_group, ones_group),
)
[docs]RULE_ORDINAL_MILLIONS = FunctionValue(
parse_ordinal,
combine_patterns(
millions_group, thousands_group, hundreds_group, tens_group, ones_group
),
)
[docs]RULE_ORDINAL_BILLIONS = FunctionValue(
parse_ordinal,
combine_patterns(
billions_group,
millions_group,
thousands_group,
hundreds_group,
tens_group,
ones_group,
),
)
[docs]RULE_ORDINAL_TRILLIONS = FunctionValue(
parse_ordinal,
combine_patterns(
trillions_group,
billions_group,
millions_group,
thousands_group,
hundreds_group,
tens_group,
ones_group,
),
)
[docs]RULE_ORDINAL = FunctionValue(
parse_ordinal,
wrap_pattern(
non_capturing_group(
trillions_group,
billions_group,
millions_group,
thousands_group,
combine_patterns(hundreds_group, tens_group, ones_group),
tens_group,
ones_group,
)
+ optional_non_capturing_group(
EXPRESSION_SPACE
+ spaced_patterns(
AFTER,
named_group("after_value", after_values.join()),
),
),
),
)