Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/changes/unreleased.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Unreleased

## Refactorings
* #186: Integration test for correctness of UDF path generation, using as_udf_path and pathlike
* #186: Integration test for correctness of UDF path generation, using as_udf_path and pathlike
* #245: Add backend inference for the Path-API
125 changes: 123 additions & 2 deletions exasol/bucketfs/_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
Protocol,
)

from exasol.saas.client.api_access import get_database_id

from exasol.bucketfs._buckets import (
BucketLike,
MountedBucket,
Expand Down Expand Up @@ -566,7 +568,6 @@ def build_path(**kwargs) -> PathLike:
Explicitly specified root path in a file system. This is an alternative to
providing the service_name and the bucket_name.
"""

backend = kwargs.pop("backend", StorageBackend.onprem)
path = kwargs.pop("path") if "path" in kwargs else ""

Expand All @@ -578,5 +579,125 @@ def build_path(**kwargs) -> PathLike:
bucket = _create_saas_bucket(**kwargs)
else:
bucket = _create_mounted_bucket(**kwargs)

return BucketPath(path, bucket)


def infer_backend(
bucketfs_host: str | None = None,
bucketfs_port: int | None = None,
bucketfs_name: str | None = None,
bucket: str | None = None,
bucketfs_user: str | None = None,
bucketfs_password: str | None = None,
saas_url: str | None = None,
saas_account_id: str | None = None,
saas_database_id: str | None = None,
saas_database_name: str | None = None,
saas_token: str | None = None,
) -> StorageBackend:
"""Infer the backend from the provided parameters: returns 'onprem' or 'saas',
or raises a ValueError if the list of parameters is insufficient for either of the backends.
"""
# On-prem required fields
onprem_fields = [
bucketfs_host,
bucketfs_port,
bucketfs_name,
bucket,
bucketfs_user,
bucketfs_password,
]
# SaaS required fields
saas_fields_minimal = [saas_url, saas_account_id, saas_token]
if all(onprem_fields):
return StorageBackend.onprem
elif all(saas_fields_minimal) and (saas_database_id or saas_database_name):
return StorageBackend.saas
else:
raise ValueError("Insufficient parameters to infer backend")


def get_database_id_by_name(
host: str, account_id: str, pat: str, database_name: str
) -> str:
database_id = get_database_id(
host=host, account_id=account_id, pat=pat, database_name=database_name
)
if not database_id:
raise ValueError(f"Could not find database_id for name {database_name}")
return database_id


def infer_path(
bucketfs_host: str | None = None,
bucketfs_port: int | None = None,
bucketfs_name: str | None = None,
bucket: str | None = None,
bucketfs_user: str | None = None,
bucketfs_password: str | None = None,
bucketfs_use_https: bool = True,
saas_url: str | None = None,
saas_account_id: str | None = None,
saas_database_id: str | None = None,
saas_database_name: str | None = None,
saas_token: str | None = None,
path_in_bucket: str = "",
use_ssl_cert_validation: bool = True,
ssl_trusted_ca: str | None = None,
) -> PathLike | None:
"""
Infers the correct storage backend (on-premises BucketFS or SaaS) from the provided parameters
and returns a PathLike object for accessing the specified resource.

Raises:
ValueError: If the parameters are insufficient or inconsistent and the backend cannot be determined.
"""
backend = infer_backend(
bucketfs_host,
bucketfs_port,
bucketfs_name,
bucket,
bucketfs_user,
bucketfs_password,
saas_url,
saas_account_id,
saas_database_id,
saas_database_name,
saas_token,
)
if backend == "onprem":
bfs_url = f"{'https' if bucketfs_use_https else 'http'}://{bucketfs_host}:{bucketfs_port}"
verify = ssl_trusted_ca or use_ssl_cert_validation
return build_path(
backend=StorageBackend.onprem,
url=bfs_url,
username=bucketfs_user,
password=bucketfs_password,
service_name=bucketfs_name,
bucket_name=bucket,
verify=verify,
path=path_in_bucket,
)
elif backend == "saas":
if not saas_database_id and saas_database_name:
saas_database_id = get_database_id_by_name(
host=saas_url,
account_id=saas_account_id,
pat=saas_token,
database_name=saas_database_name,
)
elif not saas_database_id and not saas_database_name:
raise ValueError(
"Incomplete parameter list. "
"Please either provide saas_database_id or saas_database_name."
)
return build_path(
backend=StorageBackend.saas,
url=saas_url,
account_id=saas_account_id,
database_id=saas_database_id,
pat=saas_token,
path=path_in_bucket,
)
else:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, for a late comment. I think it might be useful to also include the Mounted BucketFS path here (and in the infer_backend). This backend is just a file path that can be used by either a UDF or for testing. See build_path.
The ValueError would probably be "Unsupported backend". Insufficient parameters would be thrown by the infer_backend. If it was happy with the parameters, this must be some kind of a new backend, this function is unaware of or really doesn't want to support. This is a very small thing of course.

raise ValueError("Insufficient parameters to infer correct storage backend.")
119 changes: 119 additions & 0 deletions test/integration/test_path.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from __future__ import annotations

import io
import re
import tarfile
from collections.abc import ByteString

import pytest

import exasol.bucketfs as bfs
from exasol.bucketfs._path import infer_path


@pytest.fixture
Expand Down Expand Up @@ -101,3 +103,120 @@ def test_write_delete(backend_aware_bucketfs_params, children_poem, classic_poem
poem_path1.rm()
expected_names = {"classic", "highlands.txt"}
assert _collect_all_names(poems_root) == expected_names


@pytest.fixture
def require_saas_params(backend_aware_onprem_bucketfs_params, use_onprem):
if not use_onprem:
pytest.skip("Skipped as on-premise backend is not selected")
return backend_aware_onprem_bucketfs_params


def test_infer_path_onprem(require_onprem_bucketfs_params):
"""
Creates the PathLike and validates it.
"""
if backend == "saas":
pytest.skip()
host_port = re.search(
r"http://(\d{1,3}(?:\.\d{1,3}){3}):(\d+)", backend_aware_bucketfs_params["url"]
)
url = infer_path(
bucketfs_host=host_port.group(1),
bucketfs_port=int(host_port.group(2)),
bucketfs_name=backend_aware_bucketfs_params["service_name"],
bucket=backend_aware_bucketfs_params["bucket_name"],
bucketfs_user=backend_aware_bucketfs_params["username"],
bucketfs_password=backend_aware_bucketfs_params["password"],
path_in_bucket="onpremtest/",
bucketfs_use_https=backend_aware_bucketfs_params["verify"],
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will fail if a backend other than onprem is selected.

Compare

all in pytest-plugins/pytest-backend

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK - please excuse me.
I probably was wrong.
I didn't notice that you test case should only be executed onprem, but skipped on saas.

I now replaced fixture backend by use_onprem which should serve this purpose in my suggestion above.
Still, the test case needs to call pytest.skip() by himself.

)
assert isinstance(url, bfs.path.BucketPath)
assert backend_aware_bucketfs_params["url"] == url._bucket_api._service
assert (
backend_aware_bucketfs_params["service_name"] == url._bucket_api._service_name
)
assert backend_aware_bucketfs_params["bucket_name"] == url._bucket_api._name
assert "onpremtest" == str(url._path)


@pytest.fixture
def require_saas_params(backend_aware_saas_bucketfs_params, use_saas):
if not use_saas:
pytest.skip("Skipped as SaaS backend is not selected")
return backend_aware_saas_bucketfs_params


def test_infer_path_saas(require_saas_params):
"""
Creates the SaasBucket with fixture details realted to Saas and validates it.
"""
if backend != "saas":
pytest.skip("The test runs only with SaaS database")
url = infer_path(
saas_url=saas_host,
saas_account_id=saas_account_id,
saas_database_id=backend_aware_saas_database_id,
saas_token=saas_pat,
path_in_bucket="saastest/",
)
assert isinstance(url, bfs.path.BucketPath)
assert saas_host == url._bucket_api._url
assert saas_account_id == url._bucket_api._account_id
assert backend_aware_saas_database_id == url._bucket_api._database_id
assert saas_pat == url._bucket_api._pat
assert "saastest" in str(url._path)


def test_infer_path_and_write(
backend,
backend_aware_bucketfs_params,
children_poem,
saas_host,
saas_pat,
saas_account_id,
backend_aware_saas_database_id,
):
"""
Combines the onprem and saas path inference tests
and validates the path by uploading and reading data.
"""
if backend == "saas":
if (
not saas_host
or not saas_pat
or not saas_account_id
or not backend_aware_saas_database_id
):
pytest.skip("Skipping SaaS test due to missing parameters.")
# Infer SaaS path
path = infer_path(
saas_url=saas_host,
saas_account_id=saas_account_id,
saas_database_id=backend_aware_saas_database_id,
saas_token=saas_pat,
path_in_bucket="test/",
)
else:
# On-prem inference, extract host/port as needed
host_port = re.search(
r"http://(\d{1,3}(?:\.\d{1,3}){3}):(\d+)",
backend_aware_bucketfs_params["url"],
)
path = infer_path(
bucketfs_host=host_port.group(1),
bucketfs_port=host_port.group(2),
bucketfs_name=backend_aware_bucketfs_params["service_name"],
bucket=backend_aware_bucketfs_params["bucket_name"],
bucketfs_user=backend_aware_bucketfs_params["username"],
bucketfs_password=backend_aware_bucketfs_params["password"],
path_in_bucket="test/",
bucketfs_use_https=backend_aware_bucketfs_params["verify"],
)
# Actually try uploading
write_path = path / "test_file.txt"
write_path.write(children_poem)

# Read it back for verification
read_back = b"".join(write_path.read(20))
assert read_back == children_poem
2 changes: 0 additions & 2 deletions test/integration/test_udf_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ def test_upload_and_udf_path(uploaded_file, setup_schema_and_udfs, exa_bucket):
content_from_udf_path = conn.execute(
f"SELECT READ_FILE_CONTENT_UDF('{bucket_udf_path}/{file_name}')"
).fetchone()[0]
print(content_from_udf_path)
assert content_from_udf_path == content


Expand All @@ -193,5 +192,4 @@ def test_upload_and_udf_pathlike(uploaded_file, setup_schema_and_udfs, exa_pathl
content_of_file_udf_path = conn.execute(
f"SELECT READ_FILE_CONTENT_UDF('{file_udf_path}')"
).fetchone()[0]
print(content_of_file_udf_path)
assert content_of_file_udf_path == content
85 changes: 85 additions & 0 deletions test/unit/test_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from enum import Enum
from unittest.mock import (
patch,
)

import pytest

from exasol.bucketfs._path import (
infer_backend,
infer_path,
)


class StorageBackend(Enum):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As @ahsimb said: This probably could go to production code.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or simply use StorageBackend, maybe making it public by importing in bucketfs/__init__.py.

onprem = "onprem"
saas = "saas"


# Dummy PathLike
PathLike = str


# Dummy build_path
def build_path(*args, **kwargs):
return f"mocked_path_{args}_{kwargs}"


# Let's start with infer_backend
def test_infer_backend_onprem():
result = infer_backend(
bucketfs_host="host",
bucketfs_port=123,
bucketfs_name="bfs",
bucket="mybucket",
bucketfs_user="user",
bucketfs_password="pw",
)
assert result == "onprem"


def test_infer_backend_saas_with_id():
result = infer_backend(
saas_url="https://api",
saas_account_id="acct",
saas_database_id="dbid",
saas_token="token",
)
assert result == "saas"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Compare to the enum



def test_infer_backend_saas_with_name():
result = infer_backend(
saas_url="https://api",
saas_account_id="acct",
saas_database_name="dbname",
saas_token="token",
)
assert result == "saas"


def test_infer_backend_missing_fields():
with pytest.raises(ValueError, match="Insufficient parameters"):
infer_backend(bucketfs_host="host") # incomplete


def test_infer_backend_no_fields():
with pytest.raises(ValueError):
infer_backend()


@patch("exasol.bucketfs._path.build_path", side_effect=build_path)
def test_infer_path_onprem_with_ssl_ca(mock_build):
# Should pass ssl_trusted_ca as argument verify to exasol.bucketfs._path.build_path()

result = infer_path(
bucketfs_host="host",
bucketfs_port=123,
bucketfs_name="bfs",
bucket="mybucket",
bucketfs_user="user",
bucketfs_password="pw",
ssl_trusted_ca="ca_cert",
)
called_args = mock_build.call_args[1]
assert called_args["verify"] == "ca_cert"
Loading