diff --git a/cleanlab_studio/cli/dataset/upload.py b/cleanlab_studio/cli/dataset/upload.py index 8a4761f4..a95971d6 100644 --- a/cleanlab_studio/cli/dataset/upload.py +++ b/cleanlab_studio/cli/dataset/upload.py @@ -1,6 +1,5 @@ import pathlib from typing import cast, List, Optional -import os from cleanlab_studio.internal.types import SchemaOverride from cleanlab_studio.internal.upload_helpers import upload_dataset @@ -42,8 +41,6 @@ def upload( if filepath is None: filepath = click_helpers.prompt_for_filepath("Specify your dataset filepath") - if not os.path.exists(filepath): - abort(f"cannot upload '{filepath}': no such file or directory") dataset_source = FilepathDatasetSource(filepath=pathlib.Path(filepath)) diff --git a/cleanlab_studio/errors.py b/cleanlab_studio/errors.py index 38a08a96..f301bb03 100644 --- a/cleanlab_studio/errors.py +++ b/cleanlab_studio/errors.py @@ -1,4 +1,6 @@ from asyncio import Handle +import pathlib +from typing import Union class HandledError(Exception): @@ -139,3 +141,10 @@ def __str__(self) -> str: class InvalidProjectConfiguration(HandledError): pass + + +class InvalidFilepathError(HandledError): + def __init__(self, filepath: Union[str, pathlib.Path] = "") -> None: + if isinstance(filepath, pathlib.Path): + filepath = str(filepath) + super().__init__(f"File could not be found at {filepath}. Please check the file path.") diff --git a/cleanlab_studio/internal/dataset_source/filepath_dataset_source.py b/cleanlab_studio/internal/dataset_source/filepath_dataset_source.py index 88a19448..a2174e69 100644 --- a/cleanlab_studio/internal/dataset_source/filepath_dataset_source.py +++ b/cleanlab_studio/internal/dataset_source/filepath_dataset_source.py @@ -1,9 +1,10 @@ import mimetypes import pathlib from typing import Any, Optional +import os from .dataset_source import DatasetSource -from cleanlab_studio.errors import InvalidDatasetError +from cleanlab_studio.errors import InvalidDatasetError, InvalidFilepathError class FilepathDatasetSource(DatasetSource): @@ -15,6 +16,9 @@ def __init__( **kwargs: Any, ): super().__init__(*args, **kwargs) + if not os.path.exists(filepath): + raise InvalidFilepathError(filepath=filepath) + self.dataset_name = dataset_name if dataset_name is not None else filepath.name self.file_size = filepath.stat().st_size maybe_file_type = mimetypes.guess_type(filepath)[0]