Source code for maha.rexy.templates.expression_group

__all__ = ["ExpressionGroup"]

from typing import Iterable, List, Optional, Set, Tuple, Union, overload

import maha.rexy as rx


[docs]class ExpressionGroup: """A group of expressions that match the same dimension. Expressions are evaluated in the order they were added. Parameters ---------- *expressions : List of expressions to match. High-priority expressions should be passed first. smart : bool, optional Whether to parse the text in a smart way. See :meth:`~.smart_parse`. """ __slots__ = ["expressions", "smart", "_parsed_ranges"] def __init__( self, *expressions: Union["rx.Expression", "ExpressionGroup"], smart: bool = False, ): self.expressions = self._merge_expressions(expressions) self._parsed_ranges: Set[Tuple[int, int]] = set() self.smart = smart
[docs] def compile_expressions(self): for expression in self.expressions: expression.compile()
def _merge_expressions( self, expressions: Iterable[Union["rx.Expression", "ExpressionGroup"]] ) -> List["rx.Expression"]: result = [] for expression in expressions: if isinstance(expression, ExpressionGroup): result.extend(expression.expressions) else: result.append(expression) return result
[docs] def add(self, *expression: "rx.Expression") -> None: """Add an expression to the group. Parameters ---------- *expression : Expressions to add. """ self.expressions.extend(expression)
[docs] def join(self) -> str: """Returns non capturing group of the expressions. Returns ------- str Non capturing group of the patterns. """ return rx.non_capturing_group(*list(map(str, self.expressions)))
[docs] def get_matched_expression(self, text: str) -> Optional["rx.Expression"]: """Returns the expression that fully matches the text. Parameters ---------- text : str Text to match. Returns ------- :class:`~.Expression` Expression that fully matches the text. """ for expression in self.expressions: if expression.fullmatch(text): return expression return None
[docs] def parse(self, text: str) -> Iterable["rx.ExpressionResult"]: """ Parses the text. Parameters ---------- text : str Text to parse. Yields ------- :class:`~.ExpressionResult` Extracted value. """ # TODO: Maybe provide a way to clean the text before parsing? # (e.g. remove harakat) if self.smart: yield from self.smart_parse(text) else: yield from self.normal_parse(text) self._clear_parsed()
[docs] def normal_parse(self, text: str) -> Iterable["rx.ExpressionResult"]: """Parse the input ``text`` and return the extracted values. Parameters ---------- text : str Text to parse. Yields ------- :class:`~.ExpressionResult` Extracted value. """ for expression in self.expressions: yield from expression.parse(text)
[docs] def smart_parse(self, text: str) -> Iterable["rx.ExpressionResult"]: """ Parses the text. If a value matches two or more expressions, only the first expression parses the value, no value is matched more than once. This means high-priority expressions should be added to the group first. Parameters ---------- text : str Text to parse. Yields ------- :class:`~.ExpressionResult` Extracted value. """ for result in self.normal_parse(text): if self._is_parsed(result): continue self._parsed_ranges.add((result.start, result.end)) yield result
def _is_parsed(self, result: "rx.ExpressionResult"): for start, end in self._parsed_ranges: if start <= result.start <= end and start <= result.end <= end: return True return False def _clear_parsed(self): self._parsed_ranges = set() def __add__(self, other: "ExpressionGroup") -> "ExpressionGroup": self.expressions.extend(other.expressions) return self def __iter__(self): return iter(self.expressions) @overload def __getitem__(self, index: int) -> "rx.Expression": ... @overload def __getitem__(self, index: slice) -> "ExpressionGroup": ... def __getitem__( self, index: Union[int, slice] ) -> Union["rx.Expression", "ExpressionGroup"]: if isinstance(index, slice): return ExpressionGroup(*self.expressions[index]) return self.expressions[index] def __len__(self) -> int: return len(self.expressions)