Source code for maha.datasets.functions.load_fn

__all__ = ["load_dataset"]

from typing import Union, overload

from typing_extensions import Literal

from ..templates import Dataset, IterableDataset, Name
from ..utils import DATASETS_MAP, get_dataset_path


@overload
[docs]def load_dataset( name: Literal["names"], streaming: Literal[False] = False ) -> Dataset[Name]: ...
@overload def load_dataset( name: Literal["names"], streaming: Literal[True] ) -> IterableDataset[Name]: ... def load_dataset(name: str, streaming: bool = False) -> Union[Dataset, IterableDataset]: """Loads a dataset. Parameters ---------- name Name of the dataset. streaming : bool, optional Whether to return a streaming dataset. If set to True, an IterableDataset is returned instead. Returns ------- Union[:class:`~.Dataset`, :class:`~.IterableDataset`] The loaded dataset. Raises ------ FileNotFoundError If the dataset does not exist. """ if name not in DATASETS_MAP: raise FileNotFoundError( f"Dataset '{name}' not found. The available datasets are:\n" + "\n".join(DATASETS_MAP.keys()) ) path = get_dataset_path(name) template = DATASETS_MAP[name] if streaming: return IterableDataset(path, template) else: return Dataset(path, template)