"""
Logic for converting numbers to text
"""
__all__ = [
"numbers_to_text",
]
import regex as re
import maha.cleaners.functions as functions
from maha.constants import TEH_MARBUTA
from maha.expressions import EXPRESSION_DECIMAL, EXPRESSION_INTEGER
from maha.parsers.utils import convert_to_number_if_possible
from maha.rexy import non_capturing_group
FASILA = "فاصلة"
TWO_SUFFIX_NOMINATIVE = "ان"
SUM_SUFFIX_NOMINATIVE = "ون"
TWO_SUM_SUFFIX_ACCUSATIVE = "ين"
CONNECTOR = "و"
ONE_HUNDRED = "مائة"
ONE_HUNDRED_PREFIX = "مئت"
ONE_THOUSAND = "ألف"
ONE_MILLION = "مليون"
ONE_BILLION = "مليار"
ONE_TRILLION = "تريليون"
BELOW_TEN_THOUSANDS = "آلاف"
BELOW_TEN_MILLIONS = "ملايين"
BELOW_TEN_BILLIONS = "مليارات"
BELOW_TEN_TRILLIONS = "تريليونات"
TEN_PREFIX = "عشر"
ELEVEN_PREFIX = "أحد"
EIGHT_PREFIX = "ثمان"
MULTIPLIER_MAP = {
1: [ONE_THOUSAND, BELOW_TEN_THOUSANDS],
2: [ONE_MILLION, BELOW_TEN_MILLIONS],
3: [ONE_BILLION, BELOW_TEN_BILLIONS],
4: [ONE_TRILLION, BELOW_TEN_TRILLIONS],
}
NUMBER_MAP = {
"0": "صفر",
"1": "واحد",
"2": "إثن",
"3": "ثلاث",
"4": "أربع",
"5": "خمس",
"6": "ست",
"7": "سبع",
"8": "ثماني",
"9": "تسع",
}
[docs]def numbers_to_text(text: str, accusative: bool = False):
"""Converts numbers in text to their equivalent text in Arabic.
Parameters
----------
text : str
Text with numbers to be converted.
accusative : bool, optional
If True, the number will be converted to its accusative form.
Returns
-------
str
Text with numbers converted to their equivalent text in Arabic.
"""
converted_text = functions.arabic_numbers_to_english(text)
output = re.sub(
non_capturing_group(EXPRESSION_DECIMAL, EXPRESSION_INTEGER),
lambda x: number_to_text(x.group(), accusative),
converted_text,
)
return output
def number_to_text(number: str, accusative: bool):
number_corrected = convert_to_number_if_possible(number)
if len(str(number_corrected)) > 15:
print(f"Number {number} is too long to be converted to text")
return number
splits = str(number_corrected).split(".")
if len(splits) == 2:
integer, decimal = splits
integer_part = _convert_number(integer, accusative)
decimal_part = _handle_decimal_part(integer, decimal, accusative)
if int(integer) and int(decimal):
return integer_part + f" {FASILA} " + decimal_part
elif int(integer):
return integer_part
elif int(decimal):
return decimal_part
return integer_part + decimal_part
return _convert_number(splits[0], accusative)
def _handle_decimal_part(integer_part: str, decimal_part: str, accusative: bool) -> str:
number = int(decimal_part)
suffix = ""
if number == 0:
return ""
if decimal_part[0] == "0" or int(integer_part) == 0:
decimal_suffix = "1" + len(decimal_part.rstrip("0")) * "0"
suffix = " من " + _convert_number(decimal_suffix, accusative)
return _convert_number(decimal_part, accusative) + suffix
def _convert_number(number: str, accusative) -> str:
if number == "0":
return NUMBER_MAP[number]
parts = [part[::-1] for part in re.split(r"(\d{1,3})", number[::-1]) if part]
output = []
for i, part in enumerate(parts):
text = _get_text_for_hundreds(part, accusative)
multiplier = _get_multiplier(part, i, accusative)
if text and multiplier:
if int(part) <= 2:
output.append(multiplier)
else:
output.append(text + " " + multiplier)
elif text:
output.append(text)
return f" {CONNECTOR}".join(output[::-1])
def _get_multiplier(part: str, i: int, accusative) -> str:
if i == 0:
return ""
two_suffix = TWO_SUM_SUFFIX_ACCUSATIVE if accusative else TWO_SUFFIX_NOMINATIVE
number = int(part)
multiplier = MULTIPLIER_MAP[i]
if number == 1:
return multiplier[0]
if number == 2:
return multiplier[0] + two_suffix
return multiplier[number <= 10]
def _get_text_for_hundreds(part: str, accusative: bool) -> str:
number = int(part)
part = str(number) # removes leading zeros
two_suffix = TWO_SUM_SUFFIX_ACCUSATIVE if accusative else TWO_SUFFIX_NOMINATIVE
if number < 100:
return _get_text_for_tens(part, accusative)
if part[0] == "1":
hundred_text = ONE_HUNDRED
elif part[0] == "2":
hundred_text = ONE_HUNDRED_PREFIX + two_suffix
elif part[0] == "8":
hundred_text = EIGHT_PREFIX + ONE_HUNDRED
else:
hundred_text = NUMBER_MAP[part[0]] + ONE_HUNDRED
if part[1] == "0" and part[2] == "0":
return hundred_text
return (
hundred_text
+ f" {CONNECTOR}"
+ _get_text_for_tens("".join(part[1:]), accusative)
)
def _get_text_for_tens(part: str, accusative: bool) -> str:
number = int(part)
part = str(number) # removes leading zeros
sum_suffix = TWO_SUM_SUFFIX_ACCUSATIVE if accusative else SUM_SUFFIX_NOMINATIVE
if number == 0:
return ""
if number < 10:
return _from_one_to_nine(part, accusative)
if number == 10:
return TEN_PREFIX + TEH_MARBUTA
if number == 11:
return ELEVEN_PREFIX + " " + TEN_PREFIX
if number == 12:
return NUMBER_MAP["2"] + ("ي" if accusative else "ا") + " " + TEN_PREFIX
if number < 20:
return NUMBER_MAP[part[1]] + TEH_MARBUTA + " " + TEN_PREFIX
if number == 20:
return TEN_PREFIX + sum_suffix
if part[1] == "0":
return (
NUMBER_MAP[part[0]] + sum_suffix
if part[0] != "8"
else EIGHT_PREFIX + sum_suffix
)
if part[0] == "8":
ten = EIGHT_PREFIX + sum_suffix
elif part[0] == "2":
ten = TEN_PREFIX + sum_suffix
else:
ten = NUMBER_MAP[part[0]] + sum_suffix
return _from_one_to_nine(part[1], accusative) + " " + CONNECTOR + ten
def _from_one_to_nine(part: str, accusative: bool) -> str:
number = int(part)
part = str(number) # removes leading zeros
two_suffix = TWO_SUM_SUFFIX_ACCUSATIVE if accusative else TWO_SUFFIX_NOMINATIVE
if number == 1:
return NUMBER_MAP[part]
if number == 2:
return NUMBER_MAP[part] + two_suffix
return NUMBER_MAP[part] + TEH_MARBUTA