from __future__ import annotations
__all__ = ["Expression"]
import hashlib
import pickle
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, Iterable
import regex as re
from regex import Match, Pattern
from maha import LIBRARY_PATH
from .expression_result import ExpressionResult
CACHE_PATH = Path(LIBRARY_PATH) / "rexy" / "cache"
@dataclass
[docs]class Expression:
"""Regex pattern holder.
Parameters
----------
pattern : str
Regular expression pattern.
pickle : bool
If ``True``, the compiled pattern will be pickled. This is useful to save
compilation time for large patterns.
"""
__slots__ = ["pattern", "_compiled_pattern", "pickle"]
"""Regular expersion(s) to match"""
def __init__(
self,
pattern: str,
pickle: bool = False,
):
self.pattern = str(pattern)
self.pickle = pickle
self._compiled_pattern: Pattern[str] = None # type: ignore
[docs] def compile(self):
"""Compile the regular expersion."""
if self._compiled_pattern is None:
if self.pickle:
self._load_compiled_pattern()
else:
self._compiled_pattern = re.compile(self.pattern, re.MULTILINE)
def _load_compiled_pattern(self):
# crp: compiled regex pattern
path = CACHE_PATH / f"{hash(self)}.crp"
if path.exists():
with path.open("rb") as f:
self._compiled_pattern = pickle.load(f)
else:
self._compiled_pattern = re.compile(self.pattern, re.MULTILINE)
with path.open("wb") as f:
pickle.dump(self._compiled_pattern, f)
@classmethod
[docs] def from_cache(cls, cache: str) -> Expression:
"""Load an expression from cache.
Parameters
----------
cache : str
Name of the cache file.
Returns
-------
:class:`~.Expression`
Expression.
"""
try:
expression = cls("names")
with open(CACHE_PATH / f"{cache}.crp", "rb") as f:
expression._compiled_pattern = pickle.load(f)
return expression
except FileNotFoundError:
raise ValueError(f"Cache file {cache} not found")
[docs] def search(self, text: str):
"""Search for the pattern in the input ``text``.
Parameters
----------
text : str
Text to search in.
Returns
-------
:class:`regex.Match`
Matched object.
"""
self.compile()
return self._compiled_pattern.search(text)
[docs] def match(self, text: str) -> Match[str] | None:
"""Match the pattern in the input ``text``.
Parameters
----------
text : str
Text to match in.
Returns
-------
:class:`Match[str]`
Matched object.
"""
self.compile()
return self._compiled_pattern.match(text)
[docs] def fullmatch(self, text: str) -> Match[str] | None:
"""Match the pattern in the input ``text``.
Parameters
----------
text : str
Text to match in.
Returns
-------
:class:`Match[str]`
Matched object.
"""
self.compile()
return self._compiled_pattern.fullmatch(text)
[docs] def sub(self, repl: Callable[..., str] | str, text: str) -> str:
"""Replace all occurrences of the pattern in the input ``text``.
Parameters
----------
repl : str
Replacement string.
text : str
Text to replace.
Returns
-------
str
Text with replaced occurrences.
"""
self.compile()
return self._compiled_pattern.sub(repl, text)
def __call__(self, text: str) -> Iterable[ExpressionResult]:
"""
Extract values from the input ``text``.
Parameters
----------
text : str
Text to extract the value from.
Yields
-------
:class:`~.ExpressionResult`
Extracted value.
"""
yield from self.parse(text)
[docs] def parse(self, text: str) -> Iterable[ExpressionResult]:
"""
Extract values from the input ``text``.
Parameters
----------
text : str
Text to extract the value from.
Yields
-------
:class:`~.ExpressionResult`
Extracted value.
"""
self.compile()
for m in re.finditer(self._compiled_pattern, text):
yield self._parse(m, text)
def _parse(self, match: Match[str], _: str) -> ExpressionResult:
"""Extract the value from the input ``text`` and return it.
.. note::
This is a simple implementation that needs a group to match.
.. warning::
This method is called by :meth:`__call__` to extract the value from
the input ``text``. You should not call this method directly.
Parameters
----------
match : :class:`regex.Match`
Matched object.
text : str
Text in which the match was found.
Yields
-------
:class:`~.ExpressionResult`
Extracted value.
Raises
------
ValueError
If no capture group was found.
"""
start, end = match.span()
captured_groups = match.groups()
if captured_groups is None:
raise ValueError("No captured groups")
if len(captured_groups) == 1:
captured_groups = captured_groups[0]
value = captured_groups
return ExpressionResult(start, end, value, self)
def __str__(self) -> str:
return self.pattern
def __add__(self, other: str | Expression) -> str:
return str(self) + str(other)
def __radd__(self, other):
return str(other) + str(self)
def __hash__(self):
return int(hashlib.md5(self.pattern.encode()).hexdigest(), 16)