From c44fb91f966a8492ea942e78997edee9457a7d97 Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Tue, 16 Sep 2025 09:35:21 +0000 Subject: [PATCH 01/16] added new code and tests for infer path onprem --- doc/changes/unreleased.md | 3 +- exasol/bucketfs/_path.py | 102 ++++++++++++++++++++++++++++++++++ test/integration/test_path.py | 38 +++++++++++++ 3 files changed, 142 insertions(+), 1 deletion(-) diff --git a/doc/changes/unreleased.md b/doc/changes/unreleased.md index 21d95d1e..26ddec82 100644 --- a/doc/changes/unreleased.md +++ b/doc/changes/unreleased.md @@ -1,4 +1,5 @@ # Unreleased ## Refactorings - * #186: Integration test for correctness of UDF path generation, using as_udf_path and pathlike \ No newline at end of file + * #186: Integration test for correctness of UDF path generation, using as_udf_path and pathlike + * #245: Add backend inference for the Path-API \ No newline at end of file diff --git a/exasol/bucketfs/_path.py b/exasol/bucketfs/_path.py index 81f3582d..d6db8aa8 100644 --- a/exasol/bucketfs/_path.py +++ b/exasol/bucketfs/_path.py @@ -29,6 +29,7 @@ ) from exasol.bucketfs._error import BucketFsError from exasol.bucketfs._service import Service +from typing import Optional ARCHIVE_SUFFIXES = [".tar", ".gz", ".tgz", ".zip", ".tar"] @@ -580,3 +581,104 @@ def build_path(**kwargs) -> PathLike: bucket = _create_mounted_bucket(**kwargs) return BucketPath(path, bucket) + + + + + + + +def infer_backend( + bucketfs_host: Optional[str] = None, + bucketfs_port: Optional[int] = None, + bucketfs_name: Optional[str] = None, + bucket: Optional[str] = None, + bucketfs_user: Optional[str] = None, + bucketfs_password: Optional[str] = None, + saas_url: Optional[str] = None, + saas_account_id: Optional[str] = None, + saas_database_id: Optional[str] = None, + saas_database_name: Optional[str] = None, + saas_token: Optional[str] = None +): + """Infer backend: returns 'onprem' or 'saas', or raises if incomplete.""" + # On-prem required fields + onprem_fields = [bucketfs_host, bucketfs_port, bucketfs_name, bucket, bucketfs_user, bucketfs_password] + # SaaS required fields + saas_fields_minimal = [saas_url, saas_account_id, saas_token] + if all(onprem_fields): + return "onprem" + elif all(saas_fields_minimal) and (saas_database_id or saas_database_name): + return "saas" + else: + raise ValueError("Insufficient parameters to infer backend") + + +def get_database_id( + host: str, + account_id: str, + pat: str, + database_name: str +) -> str: + database_id = some_saas_lookup_api(host, account_id, pat, database_name) + if not database_id: + raise ValueError(f"Could not find database_id for name {database_name}") + return database_id + +def infer_path( + bucketfs_host: Optional[str] = None, + bucketfs_port: Optional[int] = None, + bucketfs_name: Optional[str] = None, + bucket: Optional[str] = None, + bucketfs_user: Optional[str] = None, + bucketfs_password: Optional[str] = None, + bucketfs_use_https: bool = True, + saas_url: Optional[str] = None, + saas_account_id: Optional[str] = None, + saas_database_id: Optional[str] = None, + saas_database_name: Optional[str] = None, + saas_token: Optional[str] = None, + path_in_bucket: str = "", + use_ssl_cert_validation: bool = True, + ssl_trusted_ca: Optional[str] = None, +) -> str: + backend = infer_backend( + bucketfs_host, bucketfs_port, bucketfs_name, bucket, bucketfs_user, bucketfs_password, + saas_url, saas_account_id, saas_database_id, saas_database_name, saas_token + ) + if backend == "onprem": + bfs_url = f"{'https' if bucketfs_use_https else 'http'}://{bucketfs_host}:{bucketfs_port}" + verify = ssl_trusted_ca or use_ssl_cert_validation + return build_path( + backend=StorageBackend.onprem, + url=bfs_url, + username=bucketfs_user, + password=bucketfs_password, + service_name=bucketfs_name, + bucket_name=bucket, + verify=verify, + path=path_in_bucket, + ) + elif backend == "saas": + if not saas_database_id and saas_database_name: + saas_database_id = get_database_id( + host=saas_url, + account_id=saas_account_id, + pat=saas_token, + database_name=saas_database_name, + ) + elif not saas_database_id and not saas_database_name: + raise ValueError( + "Incomplete parameter list. " + "Please either provide saas_database_id or saas_database_name." + ) + return build_path( + backend=StorageBackend.saas, + url=saas_url, + account_id=saas_account_id, + database_id=saas_database_id, + pat=saas_token, + path=path_in_bucket, + ) + + diff --git a/test/integration/test_path.py b/test/integration/test_path.py index 87712188..889ce48f 100644 --- a/test/integration/test_path.py +++ b/test/integration/test_path.py @@ -7,6 +7,9 @@ import pytest import exasol.bucketfs as bfs +from exasol.bucketfs._path import infer_path + + @pytest.fixture @@ -101,3 +104,38 @@ def test_write_delete(backend_aware_bucketfs_params, children_poem, classic_poem poem_path1.rm() expected_names = {"classic", "highlands.txt"} assert _collect_all_names(poems_root) == expected_names + + + +def test_infer_path_onprem(): + url = infer_path( + bucketfs_host="localhost", + bucketfs_port=2580, + bucketfs_name="bfsdefault", + bucket="default", + bucketfs_user="w", + bucketfs_password="write", + path_in_bucket="foo/" + ) + assert "localhost:2580" in url + assert "bfsdefault" in url + assert "default" in url + assert "foo" in url +# +# def test_infer_path_saas(monkeypatch): +# # monkeypatch get_database_id to always return "mocked-id" +# monkeypatch.setattr( +# "exasol.bucketfs._path.get_database_id", +# lambda *args, **kwargs: "mocked-id" +# ) +# +# url = infer_path( +# saas_url="https://api.example.com", +# saas_account_id="acc-1", +# saas_database_name="test_db", +# saas_token="abc", +# path_in_bucket="bar/", +# ) +# assert "https://api.example.com" in url +# assert "mocked-id" in url +# assert "bar" in url From f879064ca263ace9c2d586b300e4d4672bf6f0f5 Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Thu, 18 Sep 2025 12:46:54 +0000 Subject: [PATCH 02/16] fixed the saas and onprem test cases --- exasol/bucketfs/_path.py | 99 +++++++++++++++++++---------------- test/integration/test_path.py | 79 ++++++++++++++++------------ 2 files changed, 101 insertions(+), 77 deletions(-) diff --git a/exasol/bucketfs/_path.py b/exasol/bucketfs/_path.py index d6db8aa8..c3f149f5 100644 --- a/exasol/bucketfs/_path.py +++ b/exasol/bucketfs/_path.py @@ -22,6 +22,8 @@ Protocol, ) +from exasol.saas.client.api_access import get_database_id + from exasol.bucketfs._buckets import ( BucketLike, MountedBucket, @@ -29,7 +31,6 @@ ) from exasol.bucketfs._error import BucketFsError from exasol.bucketfs._service import Service -from typing import Optional ARCHIVE_SUFFIXES = [".tar", ".gz", ".tgz", ".zip", ".tar"] @@ -567,7 +568,6 @@ def build_path(**kwargs) -> PathLike: Explicitly specified root path in a file system. This is an alternative to providing the service_name and the bucket_name. """ - backend = kwargs.pop("backend", StorageBackend.onprem) path = kwargs.pop("path") if "path" in kwargs else "" @@ -579,31 +579,32 @@ def build_path(**kwargs) -> PathLike: bucket = _create_saas_bucket(**kwargs) else: bucket = _create_mounted_bucket(**kwargs) - return BucketPath(path, bucket) - - - - - def infer_backend( - bucketfs_host: Optional[str] = None, - bucketfs_port: Optional[int] = None, - bucketfs_name: Optional[str] = None, - bucket: Optional[str] = None, - bucketfs_user: Optional[str] = None, - bucketfs_password: Optional[str] = None, - saas_url: Optional[str] = None, - saas_account_id: Optional[str] = None, - saas_database_id: Optional[str] = None, - saas_database_name: Optional[str] = None, - saas_token: Optional[str] = None + bucketfs_host: str | None = None, + bucketfs_port: int | None = None, + bucketfs_name: str | None = None, + bucket: str | None = None, + bucketfs_user: str | None = None, + bucketfs_password: str | None = None, + saas_url: str | None = None, + saas_account_id: str | None = None, + saas_database_id: str | None = None, + saas_database_name: str | None = None, + saas_token: str | None = None, ): """Infer backend: returns 'onprem' or 'saas', or raises if incomplete.""" # On-prem required fields - onprem_fields = [bucketfs_host, bucketfs_port, bucketfs_name, bucket, bucketfs_user, bucketfs_password] + onprem_fields = [ + bucketfs_host, + bucketfs_port, + bucketfs_name, + bucket, + bucketfs_user, + bucketfs_password, + ] # SaaS required fields saas_fields_minimal = [saas_url, saas_account_id, saas_token] if all(onprem_fields): @@ -614,37 +615,49 @@ def infer_backend( raise ValueError("Insufficient parameters to infer backend") -def get_database_id( - host: str, - account_id: str, - pat: str, - database_name: str +def get_database_id_by_name( + host: str, account_id: str, pat: str, database_name: str ) -> str: - database_id = some_saas_lookup_api(host, account_id, pat, database_name) + database_id = get_database_id( + host=host, account_id=account_id, pat=pat, database_name=database_name + ) if not database_id: raise ValueError(f"Could not find database_id for name {database_name}") return database_id + def infer_path( - bucketfs_host: Optional[str] = None, - bucketfs_port: Optional[int] = None, - bucketfs_name: Optional[str] = None, - bucket: Optional[str] = None, - bucketfs_user: Optional[str] = None, - bucketfs_password: Optional[str] = None, + bucketfs_host: str | None = None, + bucketfs_port: int | None = None, + bucketfs_name: str | None = None, + bucket: str | None = None, + bucketfs_user: str | None = None, + bucketfs_password: str | None = None, bucketfs_use_https: bool = True, - saas_url: Optional[str] = None, - saas_account_id: Optional[str] = None, - saas_database_id: Optional[str] = None, - saas_database_name: Optional[str] = None, - saas_token: Optional[str] = None, + saas_url: str | None = None, + saas_account_id: str | None = None, + saas_database_id: str | None = None, + saas_database_name: str | None = None, + saas_token: str | None = None, path_in_bucket: str = "", use_ssl_cert_validation: bool = True, - ssl_trusted_ca: Optional[str] = None, -) -> str: + ssl_trusted_ca: str | None = None, +): + """ + return Bucket based on onprem or SaaS + """ backend = infer_backend( - bucketfs_host, bucketfs_port, bucketfs_name, bucket, bucketfs_user, bucketfs_password, - saas_url, saas_account_id, saas_database_id, saas_database_name, saas_token + bucketfs_host, + bucketfs_port, + bucketfs_name, + bucket, + bucketfs_user, + bucketfs_password, + saas_url, + saas_account_id, + saas_database_id, + saas_database_name, + saas_token, ) if backend == "onprem": bfs_url = f"{'https' if bucketfs_use_https else 'http'}://{bucketfs_host}:{bucketfs_port}" @@ -661,7 +674,7 @@ def infer_path( ) elif backend == "saas": if not saas_database_id and saas_database_name: - saas_database_id = get_database_id( + saas_database_id = get_database_id_by_name( host=saas_url, account_id=saas_account_id, pat=saas_token, @@ -680,5 +693,3 @@ def infer_path( pat=saas_token, path=path_in_bucket, ) - - diff --git a/test/integration/test_path.py b/test/integration/test_path.py index 889ce48f..d4df23a3 100644 --- a/test/integration/test_path.py +++ b/test/integration/test_path.py @@ -1,6 +1,7 @@ from __future__ import annotations import io +import re import tarfile from collections.abc import ByteString @@ -10,8 +11,6 @@ from exasol.bucketfs._path import infer_path - - @pytest.fixture def children_poem() -> ByteString: poem_text = ( @@ -106,36 +105,50 @@ def test_write_delete(backend_aware_bucketfs_params, children_poem, classic_poem assert _collect_all_names(poems_root) == expected_names - -def test_infer_path_onprem(): +def test_infer_path_onprem(backend_aware_bucketfs_params): + """ + Creates the PathLike and validates it. + """ + host_port = re.search( + r"http://(\d{1,3}(?:\.\d{1,3}){3}):(\d+)", backend_aware_bucketfs_params["url"] + ) + url = infer_path( + bucketfs_host=host_port.group(1), + bucketfs_port=host_port.group(2), + bucketfs_name=backend_aware_bucketfs_params["service_name"], + bucket=backend_aware_bucketfs_params["bucket_name"], + bucketfs_user=backend_aware_bucketfs_params["username"], + bucketfs_password=backend_aware_bucketfs_params["password"], + path_in_bucket="onpremtest/", + bucketfs_use_https=backend_aware_bucketfs_params["verify"], + ) + assert isinstance(url, bfs.path.BucketPath) + assert backend_aware_bucketfs_params["url"] == url._bucket_api._service + assert ( + backend_aware_bucketfs_params["service_name"] == url._bucket_api._service_name + ) + assert backend_aware_bucketfs_params["bucket_name"] == url._bucket_api._name + assert "onpremtest" == str(url._path) + + +def test_infer_path_saas( + backend, saas_host, saas_pat, saas_account_id, backend_aware_saas_database_id +): + """ + Creates the SaasBucket with fixture details realted to Saas and validates it. + """ + if backend != "saas": + pytest.skip("The test runs only with SaaS database") url = infer_path( - bucketfs_host="localhost", - bucketfs_port=2580, - bucketfs_name="bfsdefault", - bucket="default", - bucketfs_user="w", - bucketfs_password="write", - path_in_bucket="foo/" + saas_url=saas_host, + saas_account_id=saas_account_id, + saas_database_id=backend_aware_saas_database_id, + saas_token=saas_pat, + path_in_bucket="saastest/", ) - assert "localhost:2580" in url - assert "bfsdefault" in url - assert "default" in url - assert "foo" in url -# -# def test_infer_path_saas(monkeypatch): -# # monkeypatch get_database_id to always return "mocked-id" -# monkeypatch.setattr( -# "exasol.bucketfs._path.get_database_id", -# lambda *args, **kwargs: "mocked-id" -# ) -# -# url = infer_path( -# saas_url="https://api.example.com", -# saas_account_id="acc-1", -# saas_database_name="test_db", -# saas_token="abc", -# path_in_bucket="bar/", -# ) -# assert "https://api.example.com" in url -# assert "mocked-id" in url -# assert "bar" in url + assert isinstance(url, bfs.path.BucketPath) + assert saas_host == url._bucket_api._url + assert saas_account_id == url._bucket_api._account_id + assert backend_aware_saas_database_id == url._bucket_api._database_id + assert saas_pat == url._bucket_api._pat + assert "saastest" in str(url._path) From 61b12483bd225efa6b6a53955a8c5908b3498de6 Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Mon, 22 Sep 2025 09:24:31 +0000 Subject: [PATCH 03/16] added combined test --- test/integration/test_path.py | 61 ++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/test/integration/test_path.py b/test/integration/test_path.py index d4df23a3..0ce6dcd4 100644 --- a/test/integration/test_path.py +++ b/test/integration/test_path.py @@ -105,10 +105,12 @@ def test_write_delete(backend_aware_bucketfs_params, children_poem, classic_poem assert _collect_all_names(poems_root) == expected_names -def test_infer_path_onprem(backend_aware_bucketfs_params): +def test_infer_path_onprem(backend,backend_aware_bucketfs_params): """ Creates the PathLike and validates it. """ + if backend != "saas": + pytest.skip("The test runs only with SaaS database") host_port = re.search( r"http://(\d{1,3}(?:\.\d{1,3}){3}):(\d+)", backend_aware_bucketfs_params["url"] ) @@ -152,3 +154,60 @@ def test_infer_path_saas( assert backend_aware_saas_database_id == url._bucket_api._database_id assert saas_pat == url._bucket_api._pat assert "saastest" in str(url._path) + + +def test_infer_path_and_write( + backend, + backend_aware_bucketfs_params, + children_poem, + saas_host, + saas_pat, + saas_account_id, + backend_aware_saas_database_id, +): + print(backend) + print(backend_aware_bucketfs_params) + print(children_poem) + print(saas_host) + print(saas_pat) + print(saas_account_id) + print(backend_aware_saas_database_id) + """ + Combines the onprem and saas path inference tests + and validates the path by uploading and reading data. + """ + if backend == "saas": + if not saas_host or not saas_pat or not saas_account_id or not backend_aware_saas_database_id: + print(saas_host,saas_pat,saas_account_id,backend_aware_saas_database_id) + pytest.skip("Skipping SaaS test due to missing parameters.") + # Infer SaaS path + path = infer_path( + saas_url=saas_host, + saas_account_id=saas_account_id, + saas_database_id=backend_aware_saas_database_id, + saas_token=saas_pat, + path_in_bucket="test/", + ) + else: + # On-prem inference, extract host/port as needed + host_port = re.search( + r"http://(\d{1,3}(?:\.\d{1,3}){3}):(\d+)", backend_aware_bucketfs_params["url"] + ) + print("JS23-backend_aware_bucketfs_params['url']",backend_aware_bucketfs_params["url"]) + path = infer_path( + bucketfs_host=host_port.group(1), + bucketfs_port=host_port.group(2), + bucketfs_name=backend_aware_bucketfs_params["service_name"], + bucket=backend_aware_bucketfs_params["bucket_name"], + bucketfs_user=backend_aware_bucketfs_params["username"], + bucketfs_password=backend_aware_bucketfs_params["password"], + path_in_bucket="test/", + bucketfs_use_https=backend_aware_bucketfs_params["verify"], + ) + # Actually try uploading + write_path = path / "test_file.txt" + write_path.write(children_poem) + + # Read it back for verification + read_back = b"".join(write_path.read(20)) + assert read_back == children_poem From e540f24232b342c9e16ab3ff1fa212bd4340711c Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Tue, 23 Sep 2025 11:00:21 +0000 Subject: [PATCH 04/16] removed print statements --- exasol/bucketfs/_buckets.py | 1 - test/integration/test_path.py | 11 ++--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/exasol/bucketfs/_buckets.py b/exasol/bucketfs/_buckets.py index 2d34f429..bbf5b5f8 100644 --- a/exasol/bucketfs/_buckets.py +++ b/exasol/bucketfs/_buckets.py @@ -34,7 +34,6 @@ from exasol.saas.client.openapi.models.file import File as SaasFile from requests import HTTPError from requests.auth import HTTPBasicAuth - from exasol.bucketfs._error import BucketFsError from exasol.bucketfs._logging import LOGGER from exasol.bucketfs._shared import ( diff --git a/test/integration/test_path.py b/test/integration/test_path.py index 0ce6dcd4..606dfd97 100644 --- a/test/integration/test_path.py +++ b/test/integration/test_path.py @@ -109,8 +109,8 @@ def test_infer_path_onprem(backend,backend_aware_bucketfs_params): """ Creates the PathLike and validates it. """ - if backend != "saas": - pytest.skip("The test runs only with SaaS database") + if backend == "saas": + pytest.skip() host_port = re.search( r"http://(\d{1,3}(?:\.\d{1,3}){3}):(\d+)", backend_aware_bucketfs_params["url"] ) @@ -165,13 +165,6 @@ def test_infer_path_and_write( saas_account_id, backend_aware_saas_database_id, ): - print(backend) - print(backend_aware_bucketfs_params) - print(children_poem) - print(saas_host) - print(saas_pat) - print(saas_account_id) - print(backend_aware_saas_database_id) """ Combines the onprem and saas path inference tests and validates the path by uploading and reading data. From 18adad6805dd83c2a186a95e62dd7b40d8cfeb1b Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Wed, 24 Sep 2025 10:04:27 +0000 Subject: [PATCH 05/16] removed prints --- exasol/bucketfs/_path.py | 5 +++-- test/integration/test_path.py | 2 -- test/integration/test_udf_path.py | 2 -- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/exasol/bucketfs/_path.py b/exasol/bucketfs/_path.py index c3f149f5..c5a73850 100644 --- a/exasol/bucketfs/_path.py +++ b/exasol/bucketfs/_path.py @@ -594,7 +594,7 @@ def infer_backend( saas_database_id: str | None = None, saas_database_name: str | None = None, saas_token: str | None = None, -): +) -> str: """Infer backend: returns 'onprem' or 'saas', or raises if incomplete.""" # On-prem required fields onprem_fields = [ @@ -642,7 +642,7 @@ def infer_path( path_in_bucket: str = "", use_ssl_cert_validation: bool = True, ssl_trusted_ca: str | None = None, -): +) -> PathLike | None: """ return Bucket based on onprem or SaaS """ @@ -693,3 +693,4 @@ def infer_path( pat=saas_token, path=path_in_bucket, ) + return None diff --git a/test/integration/test_path.py b/test/integration/test_path.py index 606dfd97..45f72fd7 100644 --- a/test/integration/test_path.py +++ b/test/integration/test_path.py @@ -171,7 +171,6 @@ def test_infer_path_and_write( """ if backend == "saas": if not saas_host or not saas_pat or not saas_account_id or not backend_aware_saas_database_id: - print(saas_host,saas_pat,saas_account_id,backend_aware_saas_database_id) pytest.skip("Skipping SaaS test due to missing parameters.") # Infer SaaS path path = infer_path( @@ -186,7 +185,6 @@ def test_infer_path_and_write( host_port = re.search( r"http://(\d{1,3}(?:\.\d{1,3}){3}):(\d+)", backend_aware_bucketfs_params["url"] ) - print("JS23-backend_aware_bucketfs_params['url']",backend_aware_bucketfs_params["url"]) path = infer_path( bucketfs_host=host_port.group(1), bucketfs_port=host_port.group(2), diff --git a/test/integration/test_udf_path.py b/test/integration/test_udf_path.py index 96dd45f5..f871082b 100644 --- a/test/integration/test_udf_path.py +++ b/test/integration/test_udf_path.py @@ -169,7 +169,6 @@ def test_upload_and_udf_path(uploaded_file, setup_schema_and_udfs, exa_bucket): content_from_udf_path = conn.execute( f"SELECT READ_FILE_CONTENT_UDF('{bucket_udf_path}/{file_name}')" ).fetchone()[0] - print(content_from_udf_path) assert content_from_udf_path == content @@ -193,5 +192,4 @@ def test_upload_and_udf_pathlike(uploaded_file, setup_schema_and_udfs, exa_pathl content_of_file_udf_path = conn.execute( f"SELECT READ_FILE_CONTENT_UDF('{file_udf_path}')" ).fetchone()[0] - print(content_of_file_udf_path) assert content_of_file_udf_path == content From 368507ceee68f62201a43accdf2fd0cf5789a3ff Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Wed, 24 Sep 2025 11:15:08 +0000 Subject: [PATCH 06/16] format fix --- exasol/bucketfs/_buckets.py | 1 + test/integration/test_path.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/exasol/bucketfs/_buckets.py b/exasol/bucketfs/_buckets.py index bbf5b5f8..2d34f429 100644 --- a/exasol/bucketfs/_buckets.py +++ b/exasol/bucketfs/_buckets.py @@ -34,6 +34,7 @@ from exasol.saas.client.openapi.models.file import File as SaasFile from requests import HTTPError from requests.auth import HTTPBasicAuth + from exasol.bucketfs._error import BucketFsError from exasol.bucketfs._logging import LOGGER from exasol.bucketfs._shared import ( diff --git a/test/integration/test_path.py b/test/integration/test_path.py index 45f72fd7..1f42eba4 100644 --- a/test/integration/test_path.py +++ b/test/integration/test_path.py @@ -105,7 +105,7 @@ def test_write_delete(backend_aware_bucketfs_params, children_poem, classic_poem assert _collect_all_names(poems_root) == expected_names -def test_infer_path_onprem(backend,backend_aware_bucketfs_params): +def test_infer_path_onprem(backend, backend_aware_bucketfs_params): """ Creates the PathLike and validates it. """ @@ -170,7 +170,12 @@ def test_infer_path_and_write( and validates the path by uploading and reading data. """ if backend == "saas": - if not saas_host or not saas_pat or not saas_account_id or not backend_aware_saas_database_id: + if ( + not saas_host + or not saas_pat + or not saas_account_id + or not backend_aware_saas_database_id + ): pytest.skip("Skipping SaaS test due to missing parameters.") # Infer SaaS path path = infer_path( @@ -183,7 +188,8 @@ def test_infer_path_and_write( else: # On-prem inference, extract host/port as needed host_port = re.search( - r"http://(\d{1,3}(?:\.\d{1,3}){3}):(\d+)", backend_aware_bucketfs_params["url"] + r"http://(\d{1,3}(?:\.\d{1,3}){3}):(\d+)", + backend_aware_bucketfs_params["url"], ) path = infer_path( bucketfs_host=host_port.group(1), From 6686955b84a88e4e49410b6d5a7262f367390f47 Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Wed, 24 Sep 2025 11:54:05 +0000 Subject: [PATCH 07/16] unit test --- test/unit/test_path.py | 84 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 test/unit/test_path.py diff --git a/test/unit/test_path.py b/test/unit/test_path.py new file mode 100644 index 00000000..b2f2038e --- /dev/null +++ b/test/unit/test_path.py @@ -0,0 +1,84 @@ +from enum import Enum +from unittest.mock import ( + patch, +) + +import pytest + +from exasol.bucketfs._path import ( + infer_backend, + infer_path, +) + + +class StorageBackend(Enum): + onprem = "onprem" + saas = "saas" + + +# Dummy PathLike +PathLike = str + + +# Dummy build_path +def build_path(*args, **kwargs): + return f"mocked_path_{args}_{kwargs}" + + +# Let's start with infer_backend +def test_infer_backend_onprem(): + result = infer_backend( + bucketfs_host="host", + bucketfs_port=123, + bucketfs_name="bfs", + bucket="mybucket", + bucketfs_user="user", + bucketfs_password="pw", + ) + assert result == "onprem" + + +def test_infer_backend_saas_with_id(): + result = infer_backend( + saas_url="https://api", + saas_account_id="acct", + saas_database_id="dbid", + saas_token="token", + ) + assert result == "saas" + + +def test_infer_backend_saas_with_name(): + result = infer_backend( + saas_url="https://api", + saas_account_id="acct", + saas_database_name="dbname", + saas_token="token", + ) + assert result == "saas" + + +def test_infer_backend_missing_fields(): + with pytest.raises(ValueError, match="Insufficient parameters"): + infer_backend(bucketfs_host="host") # incomplete + + +def test_infer_backend_no_fields(): + with pytest.raises(ValueError): + infer_backend() + + +@patch("exasol.bucketfs._path.build_path", side_effect=build_path) +def test_infer_path_onprem_with_ssl_ca(mock_build): + # Should pass ssl_trusted_ca as verify + result = infer_path( + bucketfs_host="host", + bucketfs_port=123, + bucketfs_name="bfs", + bucket="mybucket", + bucketfs_user="user", + bucketfs_password="pw", + ssl_trusted_ca="ca_cert", + ) + called_args = mock_build.call_args[1] + assert called_args["verify"] == "ca_cert" From 73e155f44559965164d14c5f0cac634f063b9b6e Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Wed, 24 Sep 2025 11:57:40 +0000 Subject: [PATCH 08/16] format fix --- exasol/bucketfs/_path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exasol/bucketfs/_path.py b/exasol/bucketfs/_path.py index c5a73850..2117e507 100644 --- a/exasol/bucketfs/_path.py +++ b/exasol/bucketfs/_path.py @@ -644,7 +644,7 @@ def infer_path( ssl_trusted_ca: str | None = None, ) -> PathLike | None: """ - return Bucket based on onprem or SaaS + return PathLike based on onprem or SaaS """ backend = infer_backend( bucketfs_host, From 8414ce8cab37b287c0da3b38fee53d08fece5bac Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Wed, 24 Sep 2025 17:28:58 +0530 Subject: [PATCH 09/16] Update exasol/bucketfs/_path.py Co-authored-by: Mikhail Beck --- exasol/bucketfs/_path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exasol/bucketfs/_path.py b/exasol/bucketfs/_path.py index 2117e507..426eb7fd 100644 --- a/exasol/bucketfs/_path.py +++ b/exasol/bucketfs/_path.py @@ -595,7 +595,7 @@ def infer_backend( saas_database_name: str | None = None, saas_token: str | None = None, ) -> str: - """Infer backend: returns 'onprem' or 'saas', or raises if incomplete.""" + """Infer the backend from the provided parameters: returns 'onprem' or 'saas', or raises a ValueError if the list of parameters is insufficient for either of the backends.""" # On-prem required fields onprem_fields = [ bucketfs_host, From 98ba24f1cb39a295781a80078d6b5c85b13c71fc Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Wed, 24 Sep 2025 12:18:32 +0000 Subject: [PATCH 10/16] added proper docstring and format fix --- exasol/bucketfs/_path.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/exasol/bucketfs/_path.py b/exasol/bucketfs/_path.py index 426eb7fd..14fcc16d 100644 --- a/exasol/bucketfs/_path.py +++ b/exasol/bucketfs/_path.py @@ -594,8 +594,10 @@ def infer_backend( saas_database_id: str | None = None, saas_database_name: str | None = None, saas_token: str | None = None, -) -> str: - """Infer the backend from the provided parameters: returns 'onprem' or 'saas', or raises a ValueError if the list of parameters is insufficient for either of the backends.""" +) -> StorageBackend: + """Infer the backend from the provided parameters: returns 'onprem' or 'saas', + or raises a ValueError if the list of parameters is insufficient for either of the backends. + """ # On-prem required fields onprem_fields = [ bucketfs_host, @@ -644,7 +646,11 @@ def infer_path( ssl_trusted_ca: str | None = None, ) -> PathLike | None: """ - return PathLike based on onprem or SaaS + Infers the correct storage backend (on-premises BucketFS or SaaS) from the provided parameters + and returns a PathLike object for accessing the specified resource. + + Raises: + ValueError: If the parameters are insufficient or inconsistent and the backend cannot be determined. """ backend = infer_backend( bucketfs_host, @@ -693,4 +699,5 @@ def infer_path( pat=saas_token, path=path_in_bucket, ) - return None + else: + raise ValueError("Insufficient parameters to infer correct storage backend.") From ebef6c05ba2dd2db54f05849cd2da9240945cd91 Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Wed, 24 Sep 2025 18:21:30 +0530 Subject: [PATCH 11/16] Apply suggestion from @ckunki Co-authored-by: Christoph Kuhnke --- test/unit/test_path.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/unit/test_path.py b/test/unit/test_path.py index b2f2038e..673dac05 100644 --- a/test/unit/test_path.py +++ b/test/unit/test_path.py @@ -70,7 +70,8 @@ def test_infer_backend_no_fields(): @patch("exasol.bucketfs._path.build_path", side_effect=build_path) def test_infer_path_onprem_with_ssl_ca(mock_build): - # Should pass ssl_trusted_ca as verify + # Should pass ssl_trusted_ca as argument verify to exasol.bucketfs._path.build_path() + result = infer_path( bucketfs_host="host", bucketfs_port=123, From 037c90444394d22c9ab533dfd3556adba5e0f6d0 Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Wed, 24 Sep 2025 18:22:13 +0530 Subject: [PATCH 12/16] Apply suggestion from @ckunki Co-authored-by: Christoph Kuhnke --- test/integration/test_path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/test_path.py b/test/integration/test_path.py index 1f42eba4..e82febee 100644 --- a/test/integration/test_path.py +++ b/test/integration/test_path.py @@ -116,7 +116,7 @@ def test_infer_path_onprem(backend, backend_aware_bucketfs_params): ) url = infer_path( bucketfs_host=host_port.group(1), - bucketfs_port=host_port.group(2), + bucketfs_port=int(host_port.group(2)), bucketfs_name=backend_aware_bucketfs_params["service_name"], bucket=backend_aware_bucketfs_params["bucket_name"], bucketfs_user=backend_aware_bucketfs_params["username"], From 0dc7f41b588e8d4a36429e42a5a5d36aab2147f7 Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Wed, 24 Sep 2025 18:22:32 +0530 Subject: [PATCH 13/16] Apply suggestion from @ahsimb Co-authored-by: Mikhail Beck --- exasol/bucketfs/_path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exasol/bucketfs/_path.py b/exasol/bucketfs/_path.py index 14fcc16d..cce74e9c 100644 --- a/exasol/bucketfs/_path.py +++ b/exasol/bucketfs/_path.py @@ -610,7 +610,7 @@ def infer_backend( # SaaS required fields saas_fields_minimal = [saas_url, saas_account_id, saas_token] if all(onprem_fields): - return "onprem" + return StorageBackend.onprem elif all(saas_fields_minimal) and (saas_database_id or saas_database_name): return "saas" else: From f33bcaff23e4a2616ac49d0cbdcd665eae600eec Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Thu, 25 Sep 2025 12:05:19 +0530 Subject: [PATCH 14/16] Update exasol/bucketfs/_path.py Co-authored-by: Mikhail Beck --- exasol/bucketfs/_path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exasol/bucketfs/_path.py b/exasol/bucketfs/_path.py index cce74e9c..d4ffadd2 100644 --- a/exasol/bucketfs/_path.py +++ b/exasol/bucketfs/_path.py @@ -612,7 +612,7 @@ def infer_backend( if all(onprem_fields): return StorageBackend.onprem elif all(saas_fields_minimal) and (saas_database_id or saas_database_name): - return "saas" + return StorageBackend.saas else: raise ValueError("Insufficient parameters to infer backend") From 58a6a6e8980bc6ab778c969e374b056ce3590075 Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Mon, 29 Sep 2025 12:52:18 +0530 Subject: [PATCH 15/16] Apply suggestion from @ckunki Co-authored-by: Christoph Kuhnke --- test/integration/test_path.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/test/integration/test_path.py b/test/integration/test_path.py index e82febee..be02c799 100644 --- a/test/integration/test_path.py +++ b/test/integration/test_path.py @@ -105,7 +105,14 @@ def test_write_delete(backend_aware_bucketfs_params, children_poem, classic_poem assert _collect_all_names(poems_root) == expected_names -def test_infer_path_onprem(backend, backend_aware_bucketfs_params): +@pytest.fixture +def require_saas_params(backend_aware_onprem_bucketfs_params, use_onprem): + if not use_onprem: + pytest.skip("Skipped as on-premise backend is not selected") + return backend_aware_onprem_bucketfs_params + + +def test_infer_path_onprem(require_onprem_bucketfs_params): """ Creates the PathLike and validates it. """ From 5ae1a0017c1934fe2cb87f1170480094bdc01bb9 Mon Sep 17 00:00:00 2001 From: Janaarthanan Selvarajan Date: Mon, 29 Sep 2025 12:54:58 +0530 Subject: [PATCH 16/16] Apply suggestion from @ckunki Co-authored-by: Christoph Kuhnke --- test/integration/test_path.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/test/integration/test_path.py b/test/integration/test_path.py index be02c799..d74af3e7 100644 --- a/test/integration/test_path.py +++ b/test/integration/test_path.py @@ -140,9 +140,14 @@ def test_infer_path_onprem(require_onprem_bucketfs_params): assert "onpremtest" == str(url._path) -def test_infer_path_saas( - backend, saas_host, saas_pat, saas_account_id, backend_aware_saas_database_id -): +@pytest.fixture +def require_saas_params(backend_aware_saas_bucketfs_params, use_saas): + if not use_saas: + pytest.skip("Skipped as SaaS backend is not selected") + return backend_aware_saas_bucketfs_params + + +def test_infer_path_saas(require_saas_params): """ Creates the SaasBucket with fixture details realted to Saas and validates it. """