Source code for maha.parsers.rules.duration.rule

"""Rules to extract duration."""
from __future__ import annotations

__all__ = [
    "RULE_DURATION_SECONDS",
    "RULE_DURATION_MINUTES",
    "RULE_DURATION_HOURS",
    "RULE_DURATION_DAYS",
    "RULE_DURATION_WEEKS",
    "RULE_DURATION_MONTHS",
    "RULE_DURATION_YEARS",
    "RULE_DURATION",
    "parse_duration",
]


from maha.parsers.rules.numeral.rule import (
    EXPRESSION_NUMERAL_MAP,
    RULE_NUMERAL,
    _parse_numeral,
)
from maha.parsers.templates import FunctionValue, Unit
from maha.rexy import ExpressionGroup, named_group, non_capturing_group

from ..common import (
    FRACTIONS,
    combine_patterns,
    get_fractions_of_unit_pattern,
    spaced_patterns,
)
from .template import *
from .values import *


def get_pattern(singular_frac_group, singular, dual, all_units):
    """Get regex pattern for duration."""

    return non_capturing_group(
        spaced_patterns(RULE_NUMERAL, all_units),
        named_group("fractions", get_fractions_of_unit_pattern(singular_frac_group)),
        dual,
        singular,
    )


def merge_same_units(values: list[ValueUnit]) -> list[ValueUnit]:
    """Merge values with same units from the input ``values``."""
    newvalues: dict[Unit, ValueUnit] = {}
    for value in values:
        unit = value.unit
        if unit in newvalues:
            newvalues[unit].value += value.value
        else:
            newvalues[unit] = value
    return list(newvalues.values())


def get_unit_fraction_value(matched_text):
    val1, val2 = matched_text.split(" ", 1)
    fraction = FRACTIONS.get_matched_expression(val1)
    if not fraction:
        fraction = FRACTIONS.get_matched_expression(val2)
        value = get_matched_value(val1)
    else:
        value = get_matched_value(val2)
    value.value = fraction.value  # type: ignore
    return value


def _parse(matched_text, singular=None, plural=None):
    if singular and plural:
        multiplier = plural.search(matched_text) or singular.search(matched_text)
        matched_text = matched_text.replace(multiplier.group(0), "").strip()

    fraction = FRACTIONS.get_matched_expression(matched_text)
    value = ValueUnit(0, singular.value.unit)
    if fraction is not None:
        value.value = fraction.value  # type: ignore
    else:
        value.value = list(RULE_NUMERAL(matched_text))[0].value

    return value


UnitsGroup = ExpressionGroup(
    SEVERAL_SECONDS,
    SEVERAL_MINUTES,
    SEVERAL_HOURS,
    SEVERAL_DAYS,
    SEVERAL_WEEKS,
    SEVERAL_MONTHS,
    SEVERAL_YEARS,
    TWO_SECONDS,
    TWO_MINUTES,
    TWO_HOURS,
    TWO_DAYS,
    TWO_WEEKS,
    TWO_MONTHS,
    TWO_YEARS,
    ONE_SECOND,
    ONE_MINUTE,
    ONE_HOUR,
    ONE_DAY,
    ONE_WEEK,
    ONE_MONTH,
    ONE_YEAR,
)


def get_matched_value(matched_text) -> ValueUnit:
    exp_val = UnitsGroup.get_matched_expression(matched_text).value  # type: ignore
    return ValueUnit(value=exp_val.value, unit=exp_val.unit)


def get_groups():
    return [
        "seconds",
        "minutes",
        "hours",
        "days",
        "weeks",
        "months",
        "years",
    ]


[docs]def parse_duration(match): """Parse duration.""" groups = match.capturesdict() groups_keys = list(groups) duration_groups = get_groups() sorted_values = {} for group in list(EXPRESSION_NUMERAL_MAP) + duration_groups: if group not in groups_keys: continue for i, value in enumerate(groups.get(group)): index = match.starts(groups_keys.index(group) + 1)[i] sorted_values[index] = {"group": group, "value": value} sorted_values = dict(sorted(sorted_values.items())) # holds numeral values before a unit temp_dict = {} values = [] for index, item in sorted_values.items(): group = item["group"] if group not in duration_groups: temp_dict[index] = item continue numeral = _parse_numeral(temp_dict) valueunit = get_matched_value(item["value"]) if numeral: valueunit.value = numeral values.append(valueunit) temp_dict = {} for item in groups.get("fractions", []): values.append(get_unit_fraction_value(item)) values = merge_same_units(values) values.sort(key=lambda v: v.unit.value, reverse=True) return DurationValue(values)
_seconds = named_group( "seconds", non_capturing_group(ONE_SECOND, TWO_SECONDS, SEVERAL_SECONDS) ) _minutes = named_group( "minutes", non_capturing_group(ONE_MINUTE, TWO_MINUTES, SEVERAL_MINUTES) ) _hours = named_group("hours", non_capturing_group(ONE_HOUR, TWO_HOURS, SEVERAL_HOURS)) _days = named_group("days", non_capturing_group(ONE_DAY, TWO_DAYS, SEVERAL_DAYS)) _weeks = named_group("weeks", non_capturing_group(ONE_WEEK, TWO_WEEKS, SEVERAL_WEEKS)) _months = named_group( "months", non_capturing_group(ONE_MONTH, TWO_MONTHS, SEVERAL_MONTHS) ) _years = named_group("years", non_capturing_group(ONE_YEAR, TWO_YEARS, SEVERAL_YEARS)) all_units = non_capturing_group( _seconds, _minutes, _hours, _days, _weeks, _months, _years ) dual_units = non_capturing_group( named_group("seconds", TWO_SECONDS), named_group("minutes", TWO_MINUTES), named_group("hours", TWO_HOURS), named_group("days", TWO_DAYS), named_group("weeks", TWO_WEEKS), named_group("months", TWO_MONTHS), named_group("years", TWO_YEARS), ) singular_units = non_capturing_group( named_group("seconds", ONE_SECOND), named_group("minutes", ONE_MINUTE), named_group("hours", ONE_HOUR), named_group("days", ONE_DAY), named_group("weeks", ONE_WEEK), named_group("months", ONE_MONTH), named_group("years", ONE_YEAR), )
[docs]RULE_DURATION_SECONDS = FunctionValue( parse_duration, combine_patterns( get_pattern( ONE_SECOND, named_group("seconds", ONE_SECOND), named_group("seconds", TWO_SECONDS), _seconds, ) ), )
[docs]RULE_DURATION_MINUTES = FunctionValue( parse_duration, combine_patterns( get_pattern( ONE_MINUTE, named_group("minutes", ONE_MINUTE), named_group("minutes", TWO_MINUTES), _minutes, ) ), )
[docs]RULE_DURATION_HOURS = FunctionValue( parse_duration, combine_patterns( get_pattern( ONE_HOUR, named_group("hours", ONE_HOUR), named_group("hours", TWO_HOURS), _hours, ) ), )
[docs]RULE_DURATION_DAYS = FunctionValue( parse_duration, combine_patterns( get_pattern( ONE_DAY, named_group("days", ONE_DAY), named_group("days", TWO_DAYS), _days, ) ), )
[docs]RULE_DURATION_WEEKS = FunctionValue( parse_duration, combine_patterns( get_pattern( ONE_WEEK, named_group("weeks", ONE_WEEK), named_group("weeks", TWO_WEEKS), _weeks, ) ), )
[docs]RULE_DURATION_MONTHS = FunctionValue( parse_duration, combine_patterns( get_pattern( ONE_MONTH, named_group("months", ONE_MONTH), named_group("months", TWO_MONTHS), _months, ) ), )
[docs]RULE_DURATION_YEARS = FunctionValue( parse_duration, combine_patterns( get_pattern( ONE_YEAR, named_group("years", ONE_YEAR), named_group("years", TWO_YEARS), _years, ) ), )
[docs]RULE_DURATION = FunctionValue( parse_duration, combine_patterns( get_pattern( non_capturing_group( ONE_SECOND, ONE_MINUTE, ONE_HOUR, ONE_DAY, ONE_WEEK, ONE_MONTH, ONE_YEAR ), singular_units, dual_units, all_units, ), combine_all=True, ), )