Source code for maha.parsers.rules.numeral.rule

__all__ = [
    "RULE_NUMERAL_ONES",
    "RULE_NUMERAL_TENS",
    "RULE_NUMERAL_HUNDREDS",
    "RULE_NUMERAL_THOUSANDS",
    "RULE_NUMERAL_MILLIONS",
    "RULE_NUMERAL_BILLIONS",
    "RULE_NUMERAL_TRILLIONS",
    "RULE_NUMERAL_INTEGERS",
    "RULE_NUMERAL_DECIMALS",
    "RULE_NUMERAL",
    "parse_numeral",
]

import itertools as it
from typing import Optional

from maha.expressions import EXPRESSION_DECIMAL, EXPRESSION_INTEGER
from maha.parsers.templates import FunctionValue
from maha.parsers.utils import convert_to_number_if_possible
from maha.rexy import ExpressionGroup, named_group, non_capturing_group

from ..common import (
    FRACTIONS,
    WAW_CONNECTOR,
    combine_patterns,
    get_fractions_of_unit_pattern,
    spaced_patterns,
)
from .values import *


def get_combined_value(groups, multiplier="") -> float:

    singular = globals().get("ONE_" + multiplier[:-1].upper())
    dual = globals().get("TWO_" + multiplier.upper())
    plural = globals().get("SEVERAL_" + multiplier.upper())
    value = 0
    for group in groups:
        mul = get_matched_value(group, singular, dual)
        if mul:
            value += mul
        else:
            value += parse_combined(group, singular, plural)
    return value


def get_matched_value(matched_text, dual, singular):
    if not (dual or singular):
        return
    if dual.fullmatch(matched_text):
        return dual.value
    if singular.fullmatch(matched_text):
        return singular.value


def parse_ones(text):
    exp = ones.get_matched_expression(text)
    if exp:
        return exp.value  # type: ignore


def parse_tens(matched_text):
    waw = WAW_CONNECTOR.search(matched_text)
    if waw:
        _ones, _tens = matched_text.split(waw.group(0))
        value = (
            ones.get_matched_expression(_ones).value  # type: ignore
            + perfect_tens.get_matched_expression(_tens).value  # type: ignore
        )
        return value
    exp = perfect_tens.get_matched_expression(matched_text)
    if not exp:
        exp = eleven_to_nineteen.get_matched_expression(matched_text)
    if not exp and TEN.match(matched_text):
        return TEN.value

    if exp:
        return exp.value  # type: ignore


def parse_fasila(matched_text: str) -> Optional[float]:
    """
    Handle fasila in the numeral expression

    Parameters
    ----------
    matched_text : str
        Numeral text with fasila.

    Returns
    -------
    Optional[float]
        Float number.
    """
    if isinstance(matched_text, list):
        for group in matched_text:
            return parse_fasila(group)

    fasila = EXPRESSION_OF_FASILA.search(matched_text)
    if not fasila:
        return None
    before, after = matched_text.split(fasila.group(0))
    before = parse_combined(before)
    after = parse_combined(after)
    output = float(f"{before}.{after}")
    return output


def parse_combined(matched_text, singular=None, plural=None):
    value = 1
    if singular and plural:
        multiplier = plural.search(matched_text) or singular.search(matched_text)
        matched_text = matched_text.replace(multiplier.group(0), "").strip()
        value = singular.value

    number = EXPRESSION_DECIMAL.fullmatch(matched_text) or EXPRESSION_INTEGER.fullmatch(
        matched_text
    )
    if number is not None:
        number = convert_to_number_if_possible(number.group())
    tens = parse_tens(matched_text)
    ones = parse_ones(matched_text)
    fraction = FRACTIONS.get_matched_expression(matched_text)
    if number is not None:
        value *= number
    elif tens is not None:
        value *= tens
    elif ones is not None:
        value *= ones
    elif fraction is not None:
        value *= fraction.value  # type: ignore
    return value


[docs]def parse_numeral(match): groups = match.capturesdict() _trillions = groups.get("trillions") _billions = groups.get("billions") _millions = groups.get("millions") _thousands = groups.get("thousands") _hundreds = groups.get("hundreds") _tens = groups.get("tens") _ones = groups.get("ones") _decimals = groups.get("decimals") _integers = groups.get("integers") value = 0 if _trillions: value += get_combined_value(_trillions, "trillions") if _billions: value += get_combined_value(_billions, "billions") if _millions: value += get_combined_value(_millions, "millions") if _thousands: value += get_combined_value(_thousands, "thousands") if _hundreds: value += get_combined_value(_hundreds, "hundreds") if _tens: value += get_combined_value(_tens) if _ones: value += get_combined_value(_ones) if _decimals: v = parse_fasila(_decimals) if v is not None: value += v else: value += get_combined_value(_decimals) if _integers: value += get_combined_value(_integers) return value
def get_combinations(*patterns: str): for (a, b) in it.combinations_with_replacement(patterns, 2): yield a + str(EXPRESSION_OF_FASILA) + b if a != b: yield b + str(EXPRESSION_OF_FASILA) + a def get_patterns(one, two, several): return non_capturing_group( spaced_patterns(EXPRESSION_DECIMAL, non_capturing_group(one, several)), spaced_patterns(EXPRESSION_INTEGER, non_capturing_group(one, several)), spaced_patterns(tens, non_capturing_group(one, several)), spaced_patterns(ones.join(), non_capturing_group(one, several)), get_fractions_of_unit_pattern(str(one)), two, one, ) ones = ExpressionGroup(ZERO, ONE, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE) perfect_tens = ExpressionGroup( TWENTY, THIRTY, FORTY, FIFTY, SIXTY, SEVENTY, EIGHTY, NINETY ) combined_tens = ones.join() + WAW_CONNECTOR + perfect_tens.join() eleven_to_nineteen = ExpressionGroup( ELEVEN, TWELVE, THIRTEEN, FOURTEEN, FIFTEEN, SIXTEEN, SEVENTEEN, EIGHTEEN, NINETEEN ) tens = non_capturing_group( perfect_tens.join(), combined_tens, eleven_to_nineteen.join(), TEN, ) perfect_hundreds = ExpressionGroup( ONE_HUNDRED, TWO_HUNDREDS, THREE_HUNDREDS, FOUR_HUNDREDS, FIVE_HUNDREDS, SIX_HUNDREDS, SEVEN_HUNDREDS, EIGHT_HUNDREDS, NINE_HUNDREDS, ) ones_group = named_group("ones", ones.join()) tens_group = named_group("tens", tens) hundreds_group = named_group( "hundreds", non_capturing_group( perfect_hundreds[2:].join(), get_patterns(ONE_HUNDRED, TWO_HUNDREDS, SEVERAL_HUNDREDS), ), ) thousands_group = named_group( "thousands", get_patterns(ONE_THOUSAND, TWO_THOUSANDS, SEVERAL_THOUSANDS) ) millions_group = named_group( "millions", get_patterns(ONE_MILLION, TWO_MILLIONS, SEVERAL_MILLIONS) ) billions_group = named_group( "billions", get_patterns(ONE_BILLION, TWO_BILLIONS, SEVERAL_BILLIONS) ) trillions_group = named_group( "trillions", get_patterns(ONE_TRILLION, TWO_TRILLIONS, SEVERAL_TRILLIONS) ) integer_group = named_group("integers", str(EXPRESSION_INTEGER)) decimal_group = named_group( "decimals", non_capturing_group( str(EXPRESSION_DECIMAL), *list(get_combinations(str(EXPRESSION_INTEGER), tens, ones.join())), ), )
[docs]RULE_NUMERAL_ONES = FunctionValue(parse_numeral, ones_group)
[docs]RULE_NUMERAL_TENS = FunctionValue(parse_numeral, tens_group)
[docs]RULE_NUMERAL_HUNDREDS = FunctionValue( parse_numeral, combine_patterns(hundreds_group, tens_group, ones_group)
)
[docs]RULE_NUMERAL_THOUSANDS = FunctionValue( parse_numeral, combine_patterns(thousands_group, hundreds_group, tens_group, ones_group),
)
[docs]RULE_NUMERAL_MILLIONS = FunctionValue( parse_numeral, combine_patterns( millions_group, thousands_group, hundreds_group, tens_group, ones_group
), )
[docs]RULE_NUMERAL_BILLIONS = FunctionValue( parse_numeral, combine_patterns( billions_group, millions_group, thousands_group, hundreds_group, tens_group, ones_group,
), )
[docs]RULE_NUMERAL_TRILLIONS = FunctionValue( parse_numeral, combine_patterns( trillions_group, billions_group, millions_group, thousands_group, hundreds_group, tens_group, ones_group,
), )
[docs]RULE_NUMERAL_INTEGERS = FunctionValue(parse_numeral, integer_group)
[docs]RULE_NUMERAL_DECIMALS = FunctionValue(parse_numeral, decimal_group)
[docs]RULE_NUMERAL = FunctionValue( parse_numeral, combine_patterns( trillions_group, billions_group, millions_group, thousands_group, hundreds_group, decimal_group, tens_group, ones_group, integer_group, combine_all=True,
), )