Source code for maha.expressions.arabic

""" Regular expersion patterns for Arabic """

__all__ = ["EXPRESSION_ARABIC_HASHTAGS", "EXPRESSION_ARABIC_MENTIONS"]
import re

from maha.constants import (
    ALL_HARAKAT,
    AND_SIGN,
    ARABIC_LETTERS,
    ARABIC_NUMBERS,
    AT_SIGN,
    ENGLISH_NUMBERS,
    HASHTAG,
    PUNCTUATIONS,
    TATWEEL,
    UNDERSCORE,
)
from maha.rexy import Expression

[docs]EXPRESSION_ARABIC_HASHTAGS = Expression( r"(?<=\s|^|\n|{})(#(?:[{}_][-{}]?)+)\b".format( "|".join( [ re.escape(pun) for pun in PUNCTUATIONS if pun not in [AT_SIGN, AND_SIGN, UNDERSCORE] ] ), "".join(ARABIC_LETTERS + ALL_HARAKAT + ARABIC_NUMBERS) + TATWEEL, "".join(ENGLISH_NUMBERS), ) )
""" Expression that matches Arabic hashtags """
[docs]EXPRESSION_ARABIC_MENTIONS = Expression( EXPRESSION_ARABIC_HASHTAGS.pattern.replace(HASHTAG, AT_SIGN) )
""" Expression that matches Arabic mentions """