__all__ = ["ExpressionGroup"]
from typing import Iterable, List, Optional, Set, Tuple, Union, overload
import maha.rexy as rx
[docs]class ExpressionGroup:
"""A group of expressions that match the same dimension. Expressions are evaluated
in the order they were added.
Parameters
----------
*expressions :
List of expressions to match. High-priority expressions should be passed first.
smart : bool, optional
Whether to parse the text in a smart way. See :meth:`~.smart_parse`.
"""
__slots__ = ["expressions", "smart", "_parsed_ranges"]
def __init__(
self,
*expressions: Union["rx.Expression", "ExpressionGroup"],
smart: bool = False,
):
self.expressions = self._merge_expressions(expressions)
self._parsed_ranges: Set[Tuple[int, int]] = set()
self.smart = smart
[docs] def compile_expressions(self):
for expression in self.expressions:
expression.compile()
def _merge_expressions(
self, expressions: Iterable[Union["rx.Expression", "ExpressionGroup"]]
) -> List["rx.Expression"]:
result = []
for expression in expressions:
if isinstance(expression, ExpressionGroup):
result.extend(expression.expressions)
else:
result.append(expression)
return result
[docs] def add(self, *expression: "rx.Expression") -> None:
"""Add an expression to the group.
Parameters
----------
*expression :
Expressions to add.
"""
self.expressions.extend(expression)
[docs] def join(self) -> str:
"""Returns non capturing group of the expressions.
Returns
-------
str
Non capturing group of the patterns.
"""
return rx.non_capturing_group(*list(map(str, self.expressions)))
[docs] def get_matched_expression(self, text: str) -> Optional["rx.Expression"]:
"""Returns the expression that fully matches the text.
Parameters
----------
text : str
Text to match.
Returns
-------
:class:`~.Expression`
Expression that fully matches the text.
"""
for expression in self.expressions:
if expression.fullmatch(text):
return expression
return None
[docs] def parse(self, text: str) -> Iterable["rx.ExpressionResult"]:
"""
Parses the text.
Parameters
----------
text : str
Text to parse.
Yields
-------
:class:`~.ExpressionResult`
Extracted value.
"""
# TODO: Maybe provide a way to clean the text before parsing?
# (e.g. remove harakat)
if self.smart:
yield from self.smart_parse(text)
else:
yield from self.normal_parse(text)
self._clear_parsed()
[docs] def normal_parse(self, text: str) -> Iterable["rx.ExpressionResult"]:
"""Parse the input ``text`` and return the extracted values.
Parameters
----------
text : str
Text to parse.
Yields
-------
:class:`~.ExpressionResult`
Extracted value.
"""
for expression in self.expressions:
yield from expression.parse(text)
[docs] def smart_parse(self, text: str) -> Iterable["rx.ExpressionResult"]:
"""
Parses the text. If a value matches two or more expressions, only the first
expression parses the value, no value is matched more than once. This means
high-priority expressions should be added to the group first.
Parameters
----------
text : str
Text to parse.
Yields
-------
:class:`~.ExpressionResult`
Extracted value.
"""
for result in self.normal_parse(text):
if self._is_parsed(result):
continue
self._parsed_ranges.add((result.start, result.end))
yield result
def _is_parsed(self, result: "rx.ExpressionResult"):
for start, end in self._parsed_ranges:
if start <= result.start <= end and start <= result.end <= end:
return True
return False
def _clear_parsed(self):
self._parsed_ranges = set()
def __add__(self, other: "ExpressionGroup") -> "ExpressionGroup":
self.expressions.extend(other.expressions)
return self
def __iter__(self):
return iter(self.expressions)
@overload
def __getitem__(self, index: int) -> "rx.Expression":
...
@overload
def __getitem__(self, index: slice) -> "ExpressionGroup":
...
def __getitem__(
self, index: Union[int, slice]
) -> Union["rx.Expression", "ExpressionGroup"]:
if isinstance(index, slice):
return ExpressionGroup(*self.expressions[index])
return self.expressions[index]
def __len__(self) -> int:
return len(self.expressions)