From 7f556ae0433fd154ed98d86cddf9445212e044ba Mon Sep 17 00:00:00 2001 From: roll Date: Wed, 25 Sep 2024 09:03:57 +0100 Subject: [PATCH 1/5] Fixed DP-001 --- dplib/helpers/__spec__/test_path.py | 16 ++++++++++++++++ dplib/helpers/path.py | 22 ++++++++++++++-------- dplib/helpers/profile.py | 4 ++-- 3 files changed, 32 insertions(+), 10 deletions(-) create mode 100644 dplib/helpers/__spec__/test_path.py diff --git a/dplib/helpers/__spec__/test_path.py b/dplib/helpers/__spec__/test_path.py new file mode 100644 index 0000000..0fcb226 --- /dev/null +++ b/dplib/helpers/__spec__/test_path.py @@ -0,0 +1,16 @@ +import pytest + +from dplib.helpers.path import assert_safe_path + + +@pytest.mark.parametrize( + "path", + ( + "../data.csv", + "/etc/home/secret.json", + "http:test/../../secret.json", + ), +) +def test_assert_safe_path_raises_dp_001(path: str): + with pytest.raises(Exception): + assert_safe_path(path) diff --git a/dplib/helpers/path.py b/dplib/helpers/path.py index b75158a..be3f6ff 100644 --- a/dplib/helpers/path.py +++ b/dplib/helpers/path.py @@ -3,7 +3,8 @@ import os from pathlib import Path from typing import Optional, Tuple -from urllib.parse import urlparse + +import fsspec # type: ignore from ..error import Error @@ -21,7 +22,7 @@ def infer_format(path: str, *, raise_missing: bool = False): def infer_basepath(path: str): basepath = os.path.dirname(path) - if basepath and not is_url_path(basepath): + if basepath and is_file_protocol_path(basepath): if not os.path.abspath(basepath): basepath = os.path.relpath(basepath, start=os.getcwd()) return basepath @@ -38,21 +39,26 @@ def ensure_basepath(path: str, basepath: Optional[str] = None) -> Tuple[str, str def join_basepath(path: str, basepath: Optional[str] = None) -> str: if not basepath: return path - if is_url_path(path): + if not is_file_protocol_path(path): return path - if is_url_path(basepath): + if not is_file_protocol_path(basepath): return f"{basepath}/{path}" return os.path.join(basepath, path) -def is_url_path(path: str) -> bool: - scheme = urlparse(path).scheme - return scheme in ["http", "https"] +def is_file_protocol_path(path: str) -> bool: + info = fsspec.utils.infer_storage_options(path) # type: ignore + return info.get("protocol") == "file" # type: ignore + + +def is_http_or_ftp_protocol_path(path: str) -> bool: + info = fsspec.utils.infer_storage_options(path) # type: ignore + return info.get("protocol") in ["http", "https", "ftp", "ftps"] # type: ignore def assert_safe_path(path: str, *, basepath: Optional[str] = None): """Assert that the path (untrusted) is not outside the basepath (trusted)""" - if not is_url_path(path): + if is_file_protocol_path(path): try: root = Path(basepath or os.getcwd()).resolve() item = root.joinpath(path).resolve() diff --git a/dplib/helpers/profile.py b/dplib/helpers/profile.py index c619387..d7cda04 100644 --- a/dplib/helpers/profile.py +++ b/dplib/helpers/profile.py @@ -11,7 +11,7 @@ from ..errors.metadata import MetadataError from .dict import load_dict from .file import read_file -from .path import is_url_path +from .path import is_http_or_ftp_protocol_path # TODO: implement additional user-side profile caching @@ -40,7 +40,7 @@ def read_profile(*, profile: str) -> types.IDict: profile = os.path.join(settings.PROFILE_BASEDIR, version, filename) # Ensure profile is URL - if not is_url_path(profile): + if not is_http_or_ftp_protocol_path(profile): raise Error(f'Profile MUST be a URL: "{profile}"') # Read jsonSchema From 7be561abc0a42eaa2f2128bb5f27ad08c8c5dc8f Mon Sep 17 00:00:00 2001 From: roll Date: Wed, 25 Sep 2024 09:35:58 +0100 Subject: [PATCH 2/5] Fixed DP-002 --- dplib/helpers/__spec__/test_file.py | 12 ++++++++++++ dplib/helpers/file.py | 7 +++---- 2 files changed, 15 insertions(+), 4 deletions(-) create mode 100644 dplib/helpers/__spec__/test_file.py diff --git a/dplib/helpers/__spec__/test_file.py b/dplib/helpers/__spec__/test_file.py new file mode 100644 index 0000000..e466b22 --- /dev/null +++ b/dplib/helpers/__spec__/test_file.py @@ -0,0 +1,12 @@ +import os +import stat +from pathlib import Path + +from dplib.helpers.file import write_file + + +def test_write_file_permissions_dp_002(tmpdir: Path): + path = str(tmpdir / "test.txt") + write_file(path, "Hello, World!") + mode = oct(stat.S_IMODE(os.stat(path).st_mode)) + assert mode == "0o600" diff --git a/dplib/helpers/file.py b/dplib/helpers/file.py index d79ad1e..230fd3c 100644 --- a/dplib/helpers/file.py +++ b/dplib/helpers/file.py @@ -33,16 +33,15 @@ def write_file(path: str, body: Any, *, mode: str = "wt", encoding: str = "utf-8 with tempfile.NamedTemporaryFile(mode, delete=False, encoding=eff_enc) as file: file.write(body) file.flush() - move_file(file.name, path, mode=0o644) + move_file(file.name, path) except Exception: raise Error(f'Cannot write file "{path}"') -def move_file(source: str, target: str, *, mode: Optional[int] = None): +def move_file(source: str, target: str, *, mode: int = 0o600): try: Path(target).parent.mkdir(parents=True, exist_ok=True) shutil.move(source, target) - if mode: - os.chmod(target, 0o644) + os.chmod(target, mode) except Exception: raise Error(f'Cannot move file "{source}:{target}"') From 4ad70e74a42bcfe40e38a9be7c1bf4f678ded5e1 Mon Sep 17 00:00:00 2001 From: roll Date: Wed, 25 Sep 2024 09:43:38 +0100 Subject: [PATCH 3/5] Fixed DP-003 --- dplib/helpers/__spec__/test_file.py | 13 ++++++++++--- dplib/helpers/file.py | 15 +++++++++++---- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/dplib/helpers/__spec__/test_file.py b/dplib/helpers/__spec__/test_file.py index e466b22..6a1e945 100644 --- a/dplib/helpers/__spec__/test_file.py +++ b/dplib/helpers/__spec__/test_file.py @@ -5,8 +5,15 @@ from dplib.helpers.file import write_file -def test_write_file_permissions_dp_002(tmpdir: Path): +def test_write_file_implicit_permissions_dp_002(tmpdir: Path): path = str(tmpdir / "test.txt") write_file(path, "Hello, World!") - mode = oct(stat.S_IMODE(os.stat(path).st_mode)) - assert mode == "0o600" + permissions = oct(stat.S_IMODE(os.stat(path).st_mode)) + assert permissions == "0o600" + + +def test_write_file_explicit_permissions_dp_003(tmpdir: Path): + path = str(tmpdir / "test.txt") + write_file(path, "Hello, World!", permissions=0o644) + permissions = oct(stat.S_IMODE(os.stat(path).st_mode)) + assert permissions == "0o644" diff --git a/dplib/helpers/file.py b/dplib/helpers/file.py index 230fd3c..b9d891e 100644 --- a/dplib/helpers/file.py +++ b/dplib/helpers/file.py @@ -27,21 +27,28 @@ def read_file( raise Error(f'Cannot read file "{path}"') -def write_file(path: str, body: Any, *, mode: str = "wt", encoding: str = "utf-8"): +def write_file( + path: str, + body: Any, + *, + mode: str = "wt", + encoding: str = "utf-8", + permissions: int = 0o600, +): try: eff_enc = encoding if mode == "wt" else None with tempfile.NamedTemporaryFile(mode, delete=False, encoding=eff_enc) as file: file.write(body) file.flush() - move_file(file.name, path) + move_file(file.name, path, permissions=permissions) except Exception: raise Error(f'Cannot write file "{path}"') -def move_file(source: str, target: str, *, mode: int = 0o600): +def move_file(source: str, target: str, *, permissions: int = 0o600): try: Path(target).parent.mkdir(parents=True, exist_ok=True) shutil.move(source, target) - os.chmod(target, mode) + os.chmod(target, permissions) except Exception: raise Error(f'Cannot move file "{source}:{target}"') From e7563461b380bd3406a64d26e10c8ef44bf65f14 Mon Sep 17 00:00:00 2001 From: roll Date: Wed, 25 Sep 2024 10:09:56 +0100 Subject: [PATCH 4/5] Fixed DP-012 --- dplib/actions/__spec__/test_metadata_convert.py | 15 +++++++++++++++ dplib/actions/metadata/convert.py | 16 +++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 dplib/actions/__spec__/test_metadata_convert.py diff --git a/dplib/actions/__spec__/test_metadata_convert.py b/dplib/actions/__spec__/test_metadata_convert.py new file mode 100644 index 0000000..6cb7581 --- /dev/null +++ b/dplib/actions/__spec__/test_metadata_convert.py @@ -0,0 +1,15 @@ +import pytest + +from dplib.actions.metadata.convert import convert_metadata + + +@pytest.mark.parametrize( + "source,target", + ( + ("bad", "ckan"), + ("ckan", "bad"), + ), +) +def test_convert_metadata_bad_source_or_target_dp_012(source: str, target: str): + with pytest.raises(ValueError): + convert_metadata("resource.json", type="resource", source=source, target=target) # type: ignore diff --git a/dplib/actions/metadata/convert.py b/dplib/actions/metadata/convert.py index f0b8aa2..2341bd9 100644 --- a/dplib/actions/metadata/convert.py +++ b/dplib/actions/metadata/convert.py @@ -1,7 +1,7 @@ from __future__ import annotations from importlib import import_module -from typing import Literal, Optional, Type, Union, cast +from typing import List, Literal, Optional, Type, cast, get_args from ...models import Package, Resource from ...system import Model @@ -17,9 +17,17 @@ def convert_metadata( ) -> Model: """Convert metadata from one notation to another.""" + # Validate source/target + if source and source not in NOTATIONS: + raise ValueError(f"Unknown source notation: {source}") + if target and target not in NOTATIONS: + raise ValueError(f"Unknown target notation: {target}") + # Source model Source = Resource if type == "resource" else Package if source: + if source not in NOTATIONS: + raise ValueError(f"Unknown source notation: {source}") module = import_module(f"dplib.plugins.{source}.models") Source: Type[Model] = getattr(module, f"{source.capitalize()}{type.capitalize()}") model = Source.from_path(path, format=format) @@ -35,5 +43,7 @@ def convert_metadata( return model -IType = Union[Literal["package"], Literal["resource"]] -INotation = Union[Literal["ckan"], Literal["dcat"], Literal["github"], Literal["zenodo"]] +IType = Literal["package", "resource"] +INotation = Literal["ckan", "dcat", "github", "zenodo"] + +NOTATIONS: List[INotation] = list(get_args(INotation)) From b5162907977e9b223c5f31884b52ba728e40a2a5 Mon Sep 17 00:00:00 2001 From: roll Date: Wed, 25 Sep 2024 10:23:46 +0100 Subject: [PATCH 5/5] Skip permission tests on Windows --- dplib/helpers/__spec__/test_file.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dplib/helpers/__spec__/test_file.py b/dplib/helpers/__spec__/test_file.py index 6a1e945..8d714be 100644 --- a/dplib/helpers/__spec__/test_file.py +++ b/dplib/helpers/__spec__/test_file.py @@ -1,10 +1,14 @@ import os +import platform import stat from pathlib import Path +import pytest + from dplib.helpers.file import write_file +@pytest.mark.skipif(platform.system() == "Windows", reason="Unix only") def test_write_file_implicit_permissions_dp_002(tmpdir: Path): path = str(tmpdir / "test.txt") write_file(path, "Hello, World!") @@ -12,6 +16,7 @@ def test_write_file_implicit_permissions_dp_002(tmpdir: Path): assert permissions == "0o600" +@pytest.mark.skipif(platform.system() == "Windows", reason="Unix only") def test_write_file_explicit_permissions_dp_003(tmpdir: Path): path = str(tmpdir / "test.txt") write_file(path, "Hello, World!", permissions=0o644)