From d6d47cf89835303cb3f358a136e04b4c1d0c4c03 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Tue, 8 Oct 2024 17:36:02 -0400 Subject: [PATCH 01/28] bindings created --- .gitignore | 73 +++++- CMakeLists.txt | 13 +- include/acquire.zarr.h | 6 +- python/CMakeLists.txt | 28 +++ python/acquire-zarr-py.cpp | 443 +++++++++++++++++++++++++++++++++ python/tests/CMakeLists.txt | 21 ++ python/tests/test_zarr.py | 58 +++++ src/streaming/array.writer.cpp | 8 +- src/streaming/s3.sink.cpp | 2 +- src/streaming/sink.cpp | 2 +- src/streaming/zarr.stream.cpp | 2 +- tests/CMakeLists.txt | 8 +- 12 files changed, 640 insertions(+), 24 deletions(-) create mode 100644 python/CMakeLists.txt create mode 100644 python/acquire-zarr-py.cpp create mode 100644 python/tests/CMakeLists.txt create mode 100644 python/tests/test_zarr.py diff --git a/.gitignore b/.gitignore index c0c9c35..4ca4e6d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,72 @@ -.idea -build*/ install -.vscode -.cache CMakeSettings.json -.vs TestResults -cmake-build* .DS_Store *.zip -_CPack_Packages/ .PVS-Studio + +# IDE +.idea +.vscode +.vs + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build*/ +cmake-build*/ +_CPack_Packages/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 239f107..69a5adb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,8 +20,19 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) option(NOTEST "Disable all tests" OFF) +option(BUILD_PYTHON "Build Python bindings" OFF) add_subdirectory(src) -add_subdirectory(tests) +if (${NOTEST}) + message(STATUS "Skipping test targets") +else () + add_subdirectory(tests) +endif () + +if (${BUILD_PYTHON}) + add_subdirectory(python) +else () + message(STATUS "Skipping Python bindings") +endif () include(CPack) diff --git a/include/acquire.zarr.h b/include/acquire.zarr.h index d74a587..32ae6ba 100644 --- a/include/acquire.zarr.h +++ b/include/acquire.zarr.h @@ -103,9 +103,9 @@ extern "C" * @return ZarrStatusCode_Success on success, or an error code on failure. */ ZarrStatusCode ZarrStream_append(ZarrStream* stream, - const void* data, - size_t bytes_in, - size_t* bytes_out); + const void* data, + size_t bytes_in, + size_t* bytes_out); #ifdef __cplusplus } diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt new file mode 100644 index 0000000..ff8d6a8 --- /dev/null +++ b/python/CMakeLists.txt @@ -0,0 +1,28 @@ +project(acquire-zarr-py) + +execute_process(COMMAND python3 -m pybind11 --cmakedir + RESULT_VARIABLE pybind11_NOT_FOUND + OUTPUT_VARIABLE pybind11_DIR + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +if (pybind11_NOT_FOUND) + message(FATAL_ERROR "pybind11 not found in the current environment. Please install pybind11 via pip.") +else () + LIST(APPEND CMAKE_MODULE_PATH ${pybind11_DIR}) + cmake_path(CONVERT CMAKE_MODULE_PATH TO_CMAKE_PATH_LIST CMAKE_MODULE_PATH) +endif () + +find_package(Python3 REQUIRED COMPONENTS Interpreter Development) +find_package(pybind11 REQUIRED) + +pybind11_add_module(acquire_zarr acquire-zarr-py.cpp) + +target_include_directories(acquire_zarr PRIVATE ${CMAKE_SOURCE_DIR}/include) +target_link_libraries(acquire_zarr PRIVATE acquire-zarr) + +set_target_properties(acquire_zarr PROPERTIES + MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>" +) + +add_subdirectory(tests) diff --git a/python/acquire-zarr-py.cpp b/python/acquire-zarr-py.cpp new file mode 100644 index 0000000..5635cc8 --- /dev/null +++ b/python/acquire-zarr-py.cpp @@ -0,0 +1,443 @@ +#include + +#include +#include +#include + +#include "acquire.zarr.h" + +namespace py = pybind11; + +namespace { +auto ZarrStreamDeleter = [](ZarrStream_s* stream) { + if (stream) { + ZarrStream_destroy(stream); + } +}; +} // namespace + +class PyZarrS3Settings +{ + public: + PyZarrS3Settings() = default; + ~PyZarrS3Settings() = default; + + void set_endpoint(const std::string& endpoint) { endpoint_ = endpoint; } + const std::string& endpoint() const { return endpoint_; } + + void set_bucket_name(const std::string& bucket) { bucket_name_ = bucket; } + const std::string& bucket_name() const { return bucket_name_; } + + void set_access_key_id(const std::string& access_key_id) + { + access_key_id_ = access_key_id; + } + const std::string& access_key_id() const { return access_key_id_; } + + void set_secret_access_key(const std::string& secret_access_key) + { + secret_access_key_ = secret_access_key; + } + const std::string& secret_access_key() const { return secret_access_key_; } + + private: + std::string endpoint_; + std::string bucket_name_; + std::string access_key_id_; + std::string secret_access_key_; +}; + +class PyZarrCompressionSettings +{ + public: + PyZarrCompressionSettings() = default; + ~PyZarrCompressionSettings() = default; + + ZarrCompressor compressor() const { return compressor_; } + void set_compressor(ZarrCompressor compressor) { compressor_ = compressor; } + + ZarrCompressionCodec codec() const { return codec_; } + void set_codec(ZarrCompressionCodec codec) { codec_ = codec; } + + uint8_t level() const { return level_; } + void set_level(uint8_t level) { level_ = level; } + + uint8_t shuffle() const { return shuffle_; } + void set_shuffle(uint8_t shuffle) { shuffle_ = shuffle; } + + private: + ZarrCompressor compressor_; + ZarrCompressionCodec codec_; + uint8_t level_; + uint8_t shuffle_; +}; + +class PyZarrDimensionProperties +{ + public: + PyZarrDimensionProperties() = default; + ~PyZarrDimensionProperties() = default; + + std::string name() const { return name_; } + void set_name(const std::string& name) { name_ = name; } + + ZarrDimensionType type() const { return type_; } + void set_type(ZarrDimensionType type) { type_ = type; } + + uint32_t array_size_px() const { return array_size_px_; } + void set_array_size_px(uint32_t size) { array_size_px_ = size; } + + uint32_t chunk_size_px() const { return chunk_size_px_; } + void set_chunk_size_px(uint32_t size) { chunk_size_px_ = size; } + + uint32_t shard_size_chunks() const { return shard_size_chunks_; } + void set_shard_size_chunks(uint32_t size) { shard_size_chunks_ = size; } + + private: + std::string name_; + ZarrDimensionType type_; + uint32_t array_size_px_; + uint32_t chunk_size_px_; + uint32_t shard_size_chunks_; +}; + +class PyZarrStreamSettings +{ + public: + PyZarrStreamSettings() = default; + ~PyZarrStreamSettings() = default; + + std::string store_path() const { return store_path_; } + void set_store_path(const std::string& path) { store_path_ = path; } + + std::optional custom_metadata() const + { + return custom_metadata_; + } + void set_custom_metadata(const std::optional& metadata) + { + custom_metadata_ = metadata; + } + + std::optional s3() const { return s3_settings_; } + void set_s3(const std::optional& settings) + { + s3_settings_ = settings; + } + + std::optional compression() const + { + return compression_settings_; + } + void set_compression( + const std::optional& settings) + { + compression_settings_ = settings; + } + + std::vector dimensions() const + { + return dimensions_; + } + void set_dimensions(const std::vector& dims) + { + dimensions_ = dims; + } + + bool multiscale() const { return multiscale_; } + void set_multiscale(bool multiscale) { multiscale_ = multiscale; } + + ZarrDataType data_type() const { return data_type_; } + void set_data_type(ZarrDataType type) { data_type_ = type; } + + ZarrVersion version() const { return version_; } + void set_version(ZarrVersion version) { version_ = version; } + + private: + std::string store_path_; + std::optional custom_metadata_; + std::optional s3_settings_; + std::optional compression_settings_; + std::vector dimensions_; + bool multiscale_ = false; + ZarrDataType data_type_; + ZarrVersion version_; +}; + +class PyZarrStream +{ + public: + explicit PyZarrStream(const PyZarrStreamSettings& settings) + { + ZarrS3Settings s3_settings; + ZarrCompressionSettings compression_settings; + + ZarrStreamSettings stream_settings{ + .store_path = nullptr, + .custom_metadata = nullptr, + .s3_settings = nullptr, + .compression_settings = nullptr, + .dimensions = nullptr, + .dimension_count = 0, + .multiscale = settings.multiscale(), + .data_type = settings.data_type(), + .version = settings.version(), + }; + + auto store_path = settings.store_path(); + stream_settings.store_path = store_path.c_str(); + + auto metadata = settings.custom_metadata(); + stream_settings.custom_metadata = + settings.custom_metadata().has_value() + ? settings.custom_metadata()->c_str() + : nullptr; + + if (settings.s3().has_value()) { + s3_settings.endpoint = settings.s3()->endpoint().c_str(); + s3_settings.bucket_name = settings.s3()->bucket_name().c_str(); + s3_settings.access_key_id = settings.s3()->access_key_id().c_str(); + s3_settings.secret_access_key = + settings.s3()->secret_access_key().c_str(); + stream_settings.s3_settings = &s3_settings; + } + + if (settings.compression().has_value()) { + compression_settings.compressor = + settings.compression()->compressor(); + compression_settings.codec = settings.compression()->codec(); + compression_settings.level = settings.compression()->level(); + compression_settings.shuffle = settings.compression()->shuffle(); + stream_settings.compression_settings = &compression_settings; + } + + const auto& dims = settings.dimensions(); + + std::vector dimension_props; + std::vector dimension_names(dims.size()); + for (auto i = 0; i < dims.size(); ++i) { + const auto& dim = dims[i]; + dimension_names[i] = dim.name(); + ZarrDimensionProperties properties{ + .name = dimension_names[i].c_str(), + .type = dim.type(), + .array_size_px = dim.array_size_px(), + .chunk_size_px = dim.chunk_size_px(), + .shard_size_chunks = dim.shard_size_chunks(), + }; + dimension_props.push_back(properties); + } + + stream_settings.dimensions = dimension_props.data(); + stream_settings.dimension_count = dims.size(); + + stream_ = + ZarrStreamPtr(ZarrStream_create(&stream_settings), ZarrStreamDeleter); + if (!stream_) { + PyErr_SetString(PyExc_RuntimeError, "Failed to create Zarr stream"); + throw py::error_already_set(); + } + } + + ~PyZarrStream() + { + if (is_active()) { + ZarrStream_destroy(stream_.get()); + } + } + + void append(py::array image_data) + { + if (!is_active()) { + PyErr_SetString(PyExc_RuntimeError, + "Cannot append unless streaming."); + throw py::error_already_set(); + } + + auto buf = image_data.request(); + auto* ptr = (uint8_t*)buf.ptr; + + size_t bytes_out; + auto status = ZarrStream_append( + stream_.get(), ptr, buf.itemsize * buf.size, &bytes_out); + + if (status != ZarrStatusCode_Success) { + std::string err = "Failed to append data to Zarr stream: " + + std::string(Zarr_get_status_message(status)); + PyErr_SetString(PyExc_RuntimeError, err.c_str()); + throw py::error_already_set(); + } + } + + bool is_active() const { return static_cast(stream_); } + + private: + using ZarrStreamPtr = + std::unique_ptr; + + ZarrStreamPtr stream_; +}; + +PYBIND11_MODULE(acquire_zarr, m) +{ + m.doc() = R"pbdoc( + Acquire Zarr Writer Python API + ----------------------- + .. currentmodule:: acquire_zarr + .. autosummary:: + :toctree: _generate + append + )pbdoc"; + + py::enum_(m, "Version") + .value("V2", ZarrVersion_2) + .value("V3", ZarrVersion_3) + .export_values(); + + py::enum_(m, "DType") + .value("DTYPE_UINT8", ZarrDataType_uint8) + .value("DTYPE_UINT16", ZarrDataType_uint16) + .value("DTYPE_UINT32", ZarrDataType_uint32) + .value("DTYPE_UINT64", ZarrDataType_uint64) + .value("DTYPE_INT8", ZarrDataType_int8) + .value("DTYPE_INT16", ZarrDataType_int16) + .value("DTYPE_INT32", ZarrDataType_int32) + .value("DTYPE_INT64", ZarrDataType_int64) + .value("DTYPE_FLOAT32", ZarrDataType_float32) + .value("DTYPE_FLOAT64", ZarrDataType_float64) + .export_values(); + + py::enum_(m, "Compressor") + .value("COMPRESSOR_NONE", ZarrCompressor_None) + .value("COMPRESSOR_BLOSC1", ZarrCompressor_Blosc1) + .export_values(); + + py::enum_(m, "CompressionCodec") + .value("COMPRESSION_NONE", ZarrCompressionCodec_None) + .value("COMPRESSION_BLOSC_LZ4", ZarrCompressionCodec_BloscLZ4) + .value("COMPRESSION_BLOSC_ZSTD", ZarrCompressionCodec_BloscZstd) + .export_values(); + + py::enum_(m, "DimensionType") + .value("DIMENSION_TYPE_SPACE", ZarrDimensionType_Space) + .value("DIMENSION_TYPE_CHANNEL", ZarrDimensionType_Channel) + .value("DIMENSION_TYPE_TIME", ZarrDimensionType_Time) + .value("DIMENSION_TYPE_OTHER", ZarrDimensionType_Other) + .export_values(); + + py::class_(m, "S3Settings") + .def(py::init<>()) + .def_property("endpoint", + &PyZarrS3Settings::endpoint, + &PyZarrS3Settings::set_endpoint) + .def_property("bucket_name", + &PyZarrS3Settings::bucket_name, + &PyZarrS3Settings::set_bucket_name) + .def_property("access_key_id", + &PyZarrS3Settings::access_key_id, + &PyZarrS3Settings::set_access_key_id) + .def_property("secret_access_key", + &PyZarrS3Settings::secret_access_key, + &PyZarrS3Settings::set_secret_access_key); + + py::class_(m, "ZarrCompressionSettings") + .def(py::init<>()) + .def_property("compressor", + &PyZarrCompressionSettings::compressor, + &PyZarrCompressionSettings::set_compressor) + .def_property("codec", + &PyZarrCompressionSettings::codec, + &PyZarrCompressionSettings::set_codec) + .def_property("level", + &PyZarrCompressionSettings::level, + &PyZarrCompressionSettings::set_level) + .def_property("shuffle", + &PyZarrCompressionSettings::shuffle, + &PyZarrCompressionSettings::set_shuffle); + + py::class_(m, "ZarrDimensionProperties") + .def(py::init<>()) + .def_property("name", + &PyZarrDimensionProperties::name, + &PyZarrDimensionProperties::set_name) + .def_property("type", + &PyZarrDimensionProperties::type, + &PyZarrDimensionProperties::set_type) + .def_property("array_size_px", + &PyZarrDimensionProperties::array_size_px, + &PyZarrDimensionProperties::set_array_size_px) + .def_property("chunk_size_px", + &PyZarrDimensionProperties::chunk_size_px, + &PyZarrDimensionProperties::set_chunk_size_px) + .def_property("shard_size_chunks", + &PyZarrDimensionProperties::shard_size_chunks, + &PyZarrDimensionProperties::set_shard_size_chunks); + + py::class_(m, "ZarrStreamSettings") + .def(py::init<>()) + .def_property("store_path", + &PyZarrStreamSettings::store_path, + &PyZarrStreamSettings::set_store_path) + .def_property( + "custom_metadata", + [](const PyZarrStreamSettings& self) -> py::object { + if (self.custom_metadata()) { + return py::cast(*self.custom_metadata()); + } + return py::none(); + }, + [](PyZarrStreamSettings& self, py::object obj) { + if (obj.is_none()) { + self.set_custom_metadata(std::nullopt); + } else { + self.set_custom_metadata(obj.cast()); + } + }) + .def_property( + "s3", + [](const PyZarrStreamSettings& self) -> py::object { + if (self.s3()) { + return py::cast(*self.s3()); + } + return py::none(); + }, + [](PyZarrStreamSettings& self, py::object obj) { + if (obj.is_none()) { + self.set_s3(std::nullopt); + } else { + self.set_s3(obj.cast()); + } + }) + .def_property( + "compression", + [](const PyZarrStreamSettings& self) -> py::object { + if (self.compression()) { + return py::cast(*self.compression()); + } + return py::none(); + }, + [](PyZarrStreamSettings& self, py::object obj) { + if (obj.is_none()) { + self.set_compression(std::nullopt); + } else { + self.set_compression(obj.cast()); + } + }) + .def_property("dimensions", + &PyZarrStreamSettings::dimensions, + &PyZarrStreamSettings::set_dimensions) + .def_property("multiscale", + &PyZarrStreamSettings::multiscale, + &PyZarrStreamSettings::set_multiscale) + .def_property("data_type", + &PyZarrStreamSettings::data_type, + &PyZarrStreamSettings::set_data_type) + .def_property("version", + &PyZarrStreamSettings::version, + &PyZarrStreamSettings::set_version); + + py::class_(m, "ZarrStream") + .def(py::init()) + .def("append", &PyZarrStream::append) + .def("is_active", &PyZarrStream::is_active); +} diff --git a/python/tests/CMakeLists.txt b/python/tests/CMakeLists.txt new file mode 100644 index 0000000..5dd7983 --- /dev/null +++ b/python/tests/CMakeLists.txt @@ -0,0 +1,21 @@ +enable_testing() + +add_test( + NAME python_api_test + COMMAND pytest ${CMAKE_CURRENT_LIST_DIR}/test_zarr.py + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} +) + +if (WIN32) + set(PYTHONPATH "${acquire-zarr-py_BINARY_DIR}/${CMAKE_BUILD_TYPE}") + message(STATUS "WINDOWS PYTHONPATH: ${PYTHONPATH}") +else () + set(PYTHONPATH "${acquire-zarr-py_BINARY_DIR}") +endif () + +set_tests_properties(python_api_test + PROPERTIES + LABELS "acquire-zarr-python" + ENVIRONMENT "PYTHONPATH=${PYTHONPATH};" + DEPENDS acquire_zarr +) \ No newline at end of file diff --git a/python/tests/test_zarr.py b/python/tests/test_zarr.py new file mode 100644 index 0000000..ddda2dc --- /dev/null +++ b/python/tests/test_zarr.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +import pytest +import numpy as np +import acquire_zarr +import shutil +import os + +""" +from time import sleep + +data = np.zeros((64, 64), dtype=np.uint8) + +def check_zarr(v3: bool, uri: str) -> None: + zarr = acquire_zarr.AcquireZarrWriter() + + zarr.use_v3 = v3 + zarr.uri = uri + + zarr.dimensions = ["x", "y", "t"] + + zarr.dimension_sizes = [64, 64, 0] + zarr.chunk_sizes = [64, 64, 1] + zarr.shape = [1,64,64,1] + zarr.dtype = acquire_zarr.DType.DTYPE_UINT16 + zarr.chunk_sizes[-1] = 1 + zarr.compression_codec = acquire_zarr.CompressionCodec.COMPRESSION_NONE + + zarr.compression_level = 5 + zarr.compression_shuffle = 0 + + # check that getters and setters work + assert zarr.dimensions == ["x", "y", "t"] + assert zarr.dimension_sizes == [64, 64, 0] + + assert zarr.chunk_sizes == [64, 64, 1] + assert zarr.compression_codec == acquire_zarr.CompressionCodec.COMPRESSION_NONE + assert zarr.compression_level == 5 + assert zarr.compression_shuffle == 0 + + try: + zarr.start() + + for i in range(3): + zarr.append(data) + + zarr.stop() + finally: + #clean up + if os.path.exists(uri): + shutil.rmtree(uri) + +def test_zarr_v2() -> None: + check_zarr(False, "test_v2.zarr") + +#def test_zarr_v3() -> None: +# check_zarr(True, "test_v3.zarr") +""" \ No newline at end of file diff --git a/src/streaming/array.writer.cpp b/src/streaming/array.writer.cpp index f1a3b55..97ddce9 100644 --- a/src/streaming/array.writer.cpp +++ b/src/streaming/array.writer.cpp @@ -425,7 +425,7 @@ zarr::ArrayWriter::close_sinks_() { for (auto i = 0; i < data_sinks_.size(); ++i) { EXPECT(finalize_sink(std::move(data_sinks_[i])), - "Failed to finalize sink ", + "Failed to finalize_ sink ", i); } data_sinks_.clear(); @@ -444,7 +444,7 @@ bool zarr::finalize_array(std::unique_ptr&& writer) { if (writer == nullptr) { - LOG_INFO("Array writer is null. Nothing to finalize."); + LOG_INFO("Array writer is null. Nothing to finalize_."); return true; } @@ -452,12 +452,12 @@ zarr::finalize_array(std::unique_ptr&& writer) try { writer->flush_(); // data sinks finalized here } catch (const std::exception& exc) { - LOG_ERROR("Failed to finalize array writer: ", exc.what()); + LOG_ERROR("Failed to finalize_ array writer: ", exc.what()); return false; } if (!finalize_sink(std::move(writer->metadata_sink_))) { - LOG_ERROR("Failed to finalize metadata sink"); + LOG_ERROR("Failed to finalize_ metadata sink"); return false; } diff --git a/src/streaming/s3.sink.cpp b/src/streaming/s3.sink.cpp index 04ea3c7..90e703d 100644 --- a/src/streaming/s3.sink.cpp +++ b/src/streaming/s3.sink.cpp @@ -32,7 +32,7 @@ zarr::S3Sink::flush_() return false; } if (!finalize_multipart_upload_()) { - LOG_ERROR("Failed to finalize multipart upload of object ", + LOG_ERROR("Failed to finalize_ multipart upload of object ", object_key_); return false; } diff --git a/src/streaming/sink.cpp b/src/streaming/sink.cpp index e351a7b..bdcf4c7 100644 --- a/src/streaming/sink.cpp +++ b/src/streaming/sink.cpp @@ -5,7 +5,7 @@ bool zarr::finalize_sink(std::unique_ptr&& sink) { if (sink == nullptr) { - LOG_INFO("Sink is null. Nothing to finalize."); + LOG_INFO("Sink is null. Nothing to finalize_."); return true; } diff --git a/src/streaming/zarr.stream.cpp b/src/streaming/zarr.stream.cpp index ccb3d01..2f3b87d 100644 --- a/src/streaming/zarr.stream.cpp +++ b/src/streaming/zarr.stream.cpp @@ -970,7 +970,7 @@ bool finalize_stream(struct ZarrStream_s* stream) { if (stream == nullptr) { - LOG_INFO("Stream is null. Nothing to finalize."); + LOG_INFO("Stream is null. Nothing to finalize_."); return true; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e308d23..4196c1c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,6 +1,2 @@ -if (${NOTEST}) - message(STATUS "Skipping test targets") -else () - add_subdirectory(unit-tests) - add_subdirectory(integration) -endif () +add_subdirectory(unit-tests) +add_subdirectory(integration) From e85d1857991774904f5d404280318780ee56b107 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Wed, 9 Oct 2024 10:44:25 -0400 Subject: [PATCH 02/28] pip install . working --- CMakeLists.txt | 10 +++--- MANIFEST.in | 5 +++ pyproject.toml | 15 +++++++++ python/CMakeLists.txt | 2 ++ setup.py | 75 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 102 insertions(+), 5 deletions(-) create mode 100644 MANIFEST.in create mode 100644 pyproject.toml create mode 100644 setup.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 69a5adb..98fcb20 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.23) +cmake_minimum_required(VERSION 3.30) project(acquire-zarr) cmake_policy(SET CMP0079 NEW) # allows use with targets in other directories enable_testing() @@ -19,14 +19,14 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -option(NOTEST "Disable all tests" OFF) +option(BUILD_TESTS "Build tests" ON) option(BUILD_PYTHON "Build Python bindings" OFF) add_subdirectory(src) -if (${NOTEST}) - message(STATUS "Skipping test targets") -else () +if (${BUILD_TESTS}) add_subdirectory(tests) +else () + message(STATUS "Skipping test targets") endif () if (${BUILD_PYTHON}) diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..c02d296 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,5 @@ +include python/CMakeLists.txt +include python/acquire-zarr-py.cpp +recursive-include include * +recursive-include python/include * +recursive-include src * \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..96a4ba4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,15 @@ +[build-system] +requires = [ + "setuptools>=42", + "wheel", + "ninja", + "cmake>=3.12", +] +build-backend = "setuptools.build_meta" + +[tool.pytest.ini_options] +minversion = "6.0" +addopts = "-ra -q" +testpaths = [ + "python/tests", +] \ No newline at end of file diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index ff8d6a8..ccac8ae 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,4 +1,6 @@ +cmake_minimum_required(VERSION 3.30) project(acquire-zarr-py) +cmake_policy(SET CMP0057 NEW) execute_process(COMMAND python3 -m pybind11 --cmakedir RESULT_VARIABLE pybind11_NOT_FOUND diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..52027c5 --- /dev/null +++ b/setup.py @@ -0,0 +1,75 @@ +import glob +import os +import subprocess +from setuptools import setup, Extension +from setuptools.command.build_ext import build_ext + + +class CMakeExtension(Extension): + def __init__(self, name, sourcedir=""): + Extension.__init__(self, name, sources=[]) + self.sourcedir = os.path.abspath(sourcedir) + + +class CMakeBuild(build_ext): + def build_extension(self, ext): + extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) + + # Required for auto-detection of auxiliary "native" libs + if not extdir.endswith(os.path.sep): + extdir += os.path.sep + + build_dir = os.path.abspath(os.path.join(ext.sourcedir, 'build')) + + cfg = "Debug" if self.debug else "Release" + + # Set CMAKE_BUILD_TYPE + cmake_args = [ + "--preset=default", + f"-DCMAKE_BUILD_TYPE={cfg}", + "-DBUILD_PYTHON=ON", + "-DBUILD_TESTS=OFF" + ] + # cmake_args += [f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"] + + if self.compiler.compiler_type == "msvc": + cmake_args.append("-DVCPKG_TARGET_TRIPLET=x64-windows-static") + + build_args = ['--config', cfg] + + # Set CMAKE_BUILD_PARALLEL_LEVEL for parallel builds + if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ: + if hasattr(self, "parallel") and self.parallel: + build_args += [f"-j{self.parallel}"] + + # Build the entire project + subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=build_dir) + subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=build_dir) + + # Move the built extension to the correct location + built_ext = os.path.join(build_dir, 'python', cfg, 'acquire_zarr*.pyd') + self.move_file(built_ext, self.get_ext_fullpath(ext.name)) + + def move_file(self, src, dst): + import shutil + try: + os.makedirs(os.path.dirname(dst), exist_ok=True) + for filename in glob.glob(src): + shutil.move(filename, dst) + except Exception as e: + print(f"Error moving file from {src} to {dst}: {e}") + raise + + +setup( + name='acquire-zarr', + version='0.0.1', + author='Your Name', + author_email='your.email@example.com', + description='Python bindings for acquire-zarr', + long_description='', + ext_modules=[CMakeExtension('acquire_zarr')], + cmdclass=dict(build_ext=CMakeBuild), + zip_safe=False, + python_requires='>=3.6', +) From 2b648c176f63682d59118ad82aa3f932b466e673 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Wed, 9 Oct 2024 11:15:17 -0400 Subject: [PATCH 03/28] python -m build working (on Windows anyway) --- .github/workflows/build.yml | 4 ++-- .github/workflows/release.yml | 6 +++--- MANIFEST.in | 13 +++++++++---- setup.py | 28 ++++++++++++++-------------- 4 files changed, 28 insertions(+), 23 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ae560e1..3198794 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -93,12 +93,12 @@ jobs: - name: Build for x64 run: | - cmake --preset=default -DVCPKG_TARGET_TRIPLET=x64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-x64 -B ${{github.workspace}}/build-x64 -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DCMAKE_OSX_ARCHITECTURES="x86_64" -DNOTEST=1 + cmake --preset=default -DVCPKG_TARGET_TRIPLET=x64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-x64 -B ${{github.workspace}}/build-x64 -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DCMAKE_OSX_ARCHITECTURES="x86_64" -DBUILD_TESTS=OFF cmake --build ${{github.workspace}}/build-x64 --config ${{matrix.build_type}} - name: Build for arm64 run: | - cmake --preset=default -DVCPKG_TARGET_TRIPLET=arm64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-arm64 -B ${{github.workspace}}/build-arm64 -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DCMAKE_OSX_ARCHITECTURES="arm64" -DNOTEST=1 + cmake --preset=default -DVCPKG_TARGET_TRIPLET=arm64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-arm64 -B ${{github.workspace}}/build-arm64 -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DCMAKE_OSX_ARCHITECTURES="arm64" -DBUILD_TESTS=OFF cmake --build ${{github.workspace}}/build-arm64 --config ${{matrix.build_type}} - name: Create a universal binary diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 981749f..9f8873a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -57,7 +57,7 @@ jobs: - name: Package run: | - cmake --preset=default -DVCPKG_TARGET_TRIPLET=${{matrix.vcpkg_triplet}} -B ${{github.workspace}}/pack -DCMAKE_BUILD_TYPE=Release -DNOTEST=1 + cmake --preset=default -DVCPKG_TARGET_TRIPLET=${{matrix.vcpkg_triplet}} -B ${{github.workspace}}/pack -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=OFF cmake --build ${{github.workspace}}/pack --config Release cpack --config ${{github.workspace}}/pack/CPackConfig.cmake -C Release -G ZIP @@ -93,12 +93,12 @@ jobs: - name: Build for x64 run: | - cmake --preset=default -DVCPKG_TARGET_TRIPLET=x64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-x64 -B ${{github.workspace}}/build-x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_OSX_ARCHITECTURES="x86_64" -DNOTEST=1 + cmake --preset=default -DVCPKG_TARGET_TRIPLET=x64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-x64 -B ${{github.workspace}}/build-x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_OSX_ARCHITECTURES="x86_64" -DBUILD_TESTS=OFF cmake --build ${{github.workspace}}/build-x64 --config Release - name: Build for arm64 run: | - cmake --preset=default -DVCPKG_TARGET_TRIPLET=arm64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-arm64 -B ${{github.workspace}}/build-arm64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_OSX_ARCHITECTURES="arm64" + cmake --preset=default -DVCPKG_TARGET_TRIPLET=arm64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-arm64 -B ${{github.workspace}}/build-arm64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_OSX_ARCHITECTURES="arm64" -DBUILD_TESTS=OFF cmake --build ${{github.workspace}}/build-arm64 --config Release - name: Test # don't release if tests are failing diff --git a/MANIFEST.in b/MANIFEST.in index c02d296..7252efb 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,10 @@ -include python/CMakeLists.txt -include python/acquire-zarr-py.cpp +include CMakeLists.txt +include CMakePresets.json +include vcpkg.json +include vcpkg-configuration.json +include cmake/*.cmake +recursive-include src * recursive-include include * -recursive-include python/include * -recursive-include src * \ No newline at end of file +recursive-include python * +include LICENSE +include README.md \ No newline at end of file diff --git a/setup.py b/setup.py index 52027c5..ffa16cb 100644 --- a/setup.py +++ b/setup.py @@ -14,8 +14,6 @@ def __init__(self, name, sourcedir=""): class CMakeBuild(build_ext): def build_extension(self, ext): extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) - - # Required for auto-detection of auxiliary "native" libs if not extdir.endswith(os.path.sep): extdir += os.path.sep @@ -23,49 +21,51 @@ def build_extension(self, ext): cfg = "Debug" if self.debug else "Release" - # Set CMAKE_BUILD_TYPE cmake_args = [ "--preset=default", f"-DCMAKE_BUILD_TYPE={cfg}", "-DBUILD_PYTHON=ON", "-DBUILD_TESTS=OFF" ] - # cmake_args += [f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"] if self.compiler.compiler_type == "msvc": cmake_args.append("-DVCPKG_TARGET_TRIPLET=x64-windows-static") build_args = ['--config', cfg] - # Set CMAKE_BUILD_PARALLEL_LEVEL for parallel builds if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ: if hasattr(self, "parallel") and self.parallel: build_args += [f"-j{self.parallel}"] - # Build the entire project + os.makedirs(build_dir, exist_ok=True) subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=build_dir) subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=build_dir) - # Move the built extension to the correct location built_ext = os.path.join(build_dir, 'python', cfg, 'acquire_zarr*.pyd') - self.move_file(built_ext, self.get_ext_fullpath(ext.name)) + matching_files = glob.glob(built_ext) + if not matching_files: + raise RuntimeError(f"Could not find any files matching {built_ext}") + + # Move the built extension to the correct location + dst = self.get_ext_fullpath(ext.name) + self.move_file(matching_files, dst) - def move_file(self, src, dst): + def move_file(self, src_files, dst): import shutil try: os.makedirs(os.path.dirname(dst), exist_ok=True) - for filename in glob.glob(src): - shutil.move(filename, dst) + for filename in src_files: + shutil.copy2(filename, dst) except Exception as e: - print(f"Error moving file from {src} to {dst}: {e}") + print(f"Error moving files to {dst}: {e}") raise setup( name='acquire-zarr', version='0.0.1', - author='Your Name', - author_email='your.email@example.com', + author='Alan Liddell', + author_email='aliddell@chanzuckerberg.com', description='Python bindings for acquire-zarr', long_description='', ext_modules=[CMakeExtension('acquire_zarr')], From 2ef675c09b64fffee98bdb98379dafcd9a445ed4 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Wed, 9 Oct 2024 12:02:39 -0400 Subject: [PATCH 04/28] get it building on linux --- pyproject.toml | 1 + setup.py | 33 +++++++++++++++++++-------------- src/streaming/CMakeLists.txt | 1 + 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 96a4ba4..b47670c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,6 +4,7 @@ requires = [ "wheel", "ninja", "cmake>=3.12", + "pybind11[global]", ] build-backend = "setuptools.build_meta" diff --git a/setup.py b/setup.py index ffa16cb..c486a9c 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ import glob import os +from pathlib import Path import subprocess from setuptools import setup, Extension from setuptools.command.build_ext import build_ext @@ -17,7 +18,7 @@ def build_extension(self, ext): if not extdir.endswith(os.path.sep): extdir += os.path.sep - build_dir = os.path.abspath(os.path.join(ext.sourcedir, 'build')) + build_dir = os.path.abspath(os.path.join(ext.sourcedir, "build")) cfg = "Debug" if self.debug else "Release" @@ -25,23 +26,26 @@ def build_extension(self, ext): "--preset=default", f"-DCMAKE_BUILD_TYPE={cfg}", "-DBUILD_PYTHON=ON", - "-DBUILD_TESTS=OFF" + "-DBUILD_TESTS=OFF", ] if self.compiler.compiler_type == "msvc": cmake_args.append("-DVCPKG_TARGET_TRIPLET=x64-windows-static") - build_args = ['--config', cfg] + build_args = ["--config", cfg] if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ: if hasattr(self, "parallel") and self.parallel: build_args += [f"-j{self.parallel}"] os.makedirs(build_dir, exist_ok=True) - subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=build_dir) - subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=build_dir) + subprocess.check_call(["cmake", ext.sourcedir] + cmake_args, cwd=build_dir) + subprocess.check_call(["cmake", "--build", "."] + build_args, cwd=build_dir) - built_ext = os.path.join(build_dir, 'python', cfg, 'acquire_zarr*.pyd') + if self.compiler.compiler_type == "msvc": + built_ext = str(Path(build_dir) / "python" / cfg / "acquire_zarr*.pyd") + else: + built_ext = str(Path(build_dir) / "python" / "acquire_zarr*.so") matching_files = glob.glob(built_ext) if not matching_files: raise RuntimeError(f"Could not find any files matching {built_ext}") @@ -52,6 +56,7 @@ def build_extension(self, ext): def move_file(self, src_files, dst): import shutil + try: os.makedirs(os.path.dirname(dst), exist_ok=True) for filename in src_files: @@ -62,14 +67,14 @@ def move_file(self, src_files, dst): setup( - name='acquire-zarr', - version='0.0.1', - author='Alan Liddell', - author_email='aliddell@chanzuckerberg.com', - description='Python bindings for acquire-zarr', - long_description='', - ext_modules=[CMakeExtension('acquire_zarr')], + name="acquire-zarr", + version="0.0.1", + author="Alan Liddell", + author_email="aliddell@chanzuckerberg.com", + description="Python bindings for acquire-zarr", + long_description="", + ext_modules=[CMakeExtension("acquire_zarr")], cmdclass=dict(build_ext=CMakeBuild), zip_safe=False, - python_requires='>=3.6', + python_requires=">=3.6", ) diff --git a/src/streaming/CMakeLists.txt b/src/streaming/CMakeLists.txt index 753c943..3ab50ac 100644 --- a/src/streaming/CMakeLists.txt +++ b/src/streaming/CMakeLists.txt @@ -51,6 +51,7 @@ target_compile_definitions(${tgt} PRIVATE set_target_properties(${tgt} PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>" + POSITION_INDEPENDENT_CODE ON ) install(TARGETS ${tgt} From 9a44c42e18489ac081bfbb1b4c24d128a43e90e4 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Wed, 9 Oct 2024 12:26:28 -0400 Subject: [PATCH 05/28] Remove test stub (save for the next PR) --- python/CMakeLists.txt | 2 -- python/tests/CMakeLists.txt | 21 -------------- python/tests/test_zarr.py | 58 ------------------------------------- 3 files changed, 81 deletions(-) delete mode 100644 python/tests/CMakeLists.txt delete mode 100644 python/tests/test_zarr.py diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index ccac8ae..9a7caed 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -26,5 +26,3 @@ target_link_libraries(acquire_zarr PRIVATE acquire-zarr) set_target_properties(acquire_zarr PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>" ) - -add_subdirectory(tests) diff --git a/python/tests/CMakeLists.txt b/python/tests/CMakeLists.txt deleted file mode 100644 index 5dd7983..0000000 --- a/python/tests/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -enable_testing() - -add_test( - NAME python_api_test - COMMAND pytest ${CMAKE_CURRENT_LIST_DIR}/test_zarr.py - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} -) - -if (WIN32) - set(PYTHONPATH "${acquire-zarr-py_BINARY_DIR}/${CMAKE_BUILD_TYPE}") - message(STATUS "WINDOWS PYTHONPATH: ${PYTHONPATH}") -else () - set(PYTHONPATH "${acquire-zarr-py_BINARY_DIR}") -endif () - -set_tests_properties(python_api_test - PROPERTIES - LABELS "acquire-zarr-python" - ENVIRONMENT "PYTHONPATH=${PYTHONPATH};" - DEPENDS acquire_zarr -) \ No newline at end of file diff --git a/python/tests/test_zarr.py b/python/tests/test_zarr.py deleted file mode 100644 index ddda2dc..0000000 --- a/python/tests/test_zarr.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python3 - -import pytest -import numpy as np -import acquire_zarr -import shutil -import os - -""" -from time import sleep - -data = np.zeros((64, 64), dtype=np.uint8) - -def check_zarr(v3: bool, uri: str) -> None: - zarr = acquire_zarr.AcquireZarrWriter() - - zarr.use_v3 = v3 - zarr.uri = uri - - zarr.dimensions = ["x", "y", "t"] - - zarr.dimension_sizes = [64, 64, 0] - zarr.chunk_sizes = [64, 64, 1] - zarr.shape = [1,64,64,1] - zarr.dtype = acquire_zarr.DType.DTYPE_UINT16 - zarr.chunk_sizes[-1] = 1 - zarr.compression_codec = acquire_zarr.CompressionCodec.COMPRESSION_NONE - - zarr.compression_level = 5 - zarr.compression_shuffle = 0 - - # check that getters and setters work - assert zarr.dimensions == ["x", "y", "t"] - assert zarr.dimension_sizes == [64, 64, 0] - - assert zarr.chunk_sizes == [64, 64, 1] - assert zarr.compression_codec == acquire_zarr.CompressionCodec.COMPRESSION_NONE - assert zarr.compression_level == 5 - assert zarr.compression_shuffle == 0 - - try: - zarr.start() - - for i in range(3): - zarr.append(data) - - zarr.stop() - finally: - #clean up - if os.path.exists(uri): - shutil.rmtree(uri) - -def test_zarr_v2() -> None: - check_zarr(False, "test_v2.zarr") - -#def test_zarr_v3() -> None: -# check_zarr(True, "test_v3.zarr") -""" \ No newline at end of file From 3978d556a83ff1d1221f8e467472cc988184bbe5 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Wed, 9 Oct 2024 12:32:24 -0400 Subject: [PATCH 06/28] Undo an overzealous rename --- src/streaming/array.writer.cpp | 8 ++++---- src/streaming/s3.sink.cpp | 2 +- src/streaming/sink.cpp | 2 +- src/streaming/zarr.stream.cpp | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/streaming/array.writer.cpp b/src/streaming/array.writer.cpp index 97ddce9..f1a3b55 100644 --- a/src/streaming/array.writer.cpp +++ b/src/streaming/array.writer.cpp @@ -425,7 +425,7 @@ zarr::ArrayWriter::close_sinks_() { for (auto i = 0; i < data_sinks_.size(); ++i) { EXPECT(finalize_sink(std::move(data_sinks_[i])), - "Failed to finalize_ sink ", + "Failed to finalize sink ", i); } data_sinks_.clear(); @@ -444,7 +444,7 @@ bool zarr::finalize_array(std::unique_ptr&& writer) { if (writer == nullptr) { - LOG_INFO("Array writer is null. Nothing to finalize_."); + LOG_INFO("Array writer is null. Nothing to finalize."); return true; } @@ -452,12 +452,12 @@ zarr::finalize_array(std::unique_ptr&& writer) try { writer->flush_(); // data sinks finalized here } catch (const std::exception& exc) { - LOG_ERROR("Failed to finalize_ array writer: ", exc.what()); + LOG_ERROR("Failed to finalize array writer: ", exc.what()); return false; } if (!finalize_sink(std::move(writer->metadata_sink_))) { - LOG_ERROR("Failed to finalize_ metadata sink"); + LOG_ERROR("Failed to finalize metadata sink"); return false; } diff --git a/src/streaming/s3.sink.cpp b/src/streaming/s3.sink.cpp index 90e703d..04ea3c7 100644 --- a/src/streaming/s3.sink.cpp +++ b/src/streaming/s3.sink.cpp @@ -32,7 +32,7 @@ zarr::S3Sink::flush_() return false; } if (!finalize_multipart_upload_()) { - LOG_ERROR("Failed to finalize_ multipart upload of object ", + LOG_ERROR("Failed to finalize multipart upload of object ", object_key_); return false; } diff --git a/src/streaming/sink.cpp b/src/streaming/sink.cpp index bdcf4c7..e351a7b 100644 --- a/src/streaming/sink.cpp +++ b/src/streaming/sink.cpp @@ -5,7 +5,7 @@ bool zarr::finalize_sink(std::unique_ptr&& sink) { if (sink == nullptr) { - LOG_INFO("Sink is null. Nothing to finalize_."); + LOG_INFO("Sink is null. Nothing to finalize."); return true; } diff --git a/src/streaming/zarr.stream.cpp b/src/streaming/zarr.stream.cpp index 2f3b87d..ccb3d01 100644 --- a/src/streaming/zarr.stream.cpp +++ b/src/streaming/zarr.stream.cpp @@ -970,7 +970,7 @@ bool finalize_stream(struct ZarrStream_s* stream) { if (stream == nullptr) { - LOG_INFO("Stream is null. Nothing to finalize_."); + LOG_INFO("Stream is null. Nothing to finalize."); return true; } From 2a4c0a864c3532195feaf42a0f7fd5049fefeb4a Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Wed, 9 Oct 2024 13:02:54 -0400 Subject: [PATCH 07/28] Add Python wheel build job --- .github/workflows/build.yml | 53 ++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3198794..29376d7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -4,6 +4,9 @@ on: push: branches: - "main" + pull_request: # TODO (aliddell): remove this + branches: + - "main" jobs: windows-and-linux-build: @@ -114,4 +117,52 @@ jobs: - uses: actions/upload-artifact@v3 with: name: macos-latest ${{matrix.build_type}} binaries - path: ${{github.workspace}}/*.zip \ No newline at end of file + path: ${{github.workspace}}/*.zip + + build-wheel: + strategy: + matrix: + platform: + - "windows-latest" + - "ubuntu-latest" + - "macos-latest" # TODO (aliddell): universal binary? + + runs-on: ${{ matrix.platform }} + + permissions: + actions: write + + steps: + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.10.0 + with: + access_token: ${{ github.token }} + + - uses: actions/checkout@v3 + with: + submodules: true + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install vcpkg + run: | + git clone https://github.com/microsoft/vcpkg.git + cd vcpkg && ./bootstrap-vcpkg.sh + echo "VCPKG_ROOT=${{github.workspace}}/vcpkg" >> $GITHUB_ENV + echo "${{github.workspace}}/vcpkg" >> $GITHUB_PATH + ./vcpkg integrate install + shell: bash + + - name: Install dependencies + run: python -m pip install -U pip "pybind11[global]" cmake build + + - name: Build + run: python -m build + + - name: Upload wheel + uses: actions/upload-artifact@v3 + with: + path: ${{github.workspace}}/dist/*.whl From 7ed144ed154feb949f29bbd44e9ee8c919259cd2 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Wed, 9 Oct 2024 13:14:06 -0400 Subject: [PATCH 08/28] Prepare for Python bindings --- .github/workflows/build.yml | 6 +-- .github/workflows/release.yml | 6 +-- .gitignore | 73 +++++++++++++++++++++++++++++++---- CMakeLists.txt | 10 +++-- include/acquire.zarr.h | 6 +-- src/streaming/CMakeLists.txt | 1 + tests/CMakeLists.txt | 8 +--- 7 files changed, 85 insertions(+), 25 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ae560e1..d67fc44 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -93,12 +93,12 @@ jobs: - name: Build for x64 run: | - cmake --preset=default -DVCPKG_TARGET_TRIPLET=x64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-x64 -B ${{github.workspace}}/build-x64 -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DCMAKE_OSX_ARCHITECTURES="x86_64" -DNOTEST=1 + cmake --preset=default -DVCPKG_TARGET_TRIPLET=x64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-x64 -B ${{github.workspace}}/build-x64 -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DCMAKE_OSX_ARCHITECTURES="x86_64" -DBUILD_TESTS=OFF cmake --build ${{github.workspace}}/build-x64 --config ${{matrix.build_type}} - name: Build for arm64 run: | - cmake --preset=default -DVCPKG_TARGET_TRIPLET=arm64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-arm64 -B ${{github.workspace}}/build-arm64 -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DCMAKE_OSX_ARCHITECTURES="arm64" -DNOTEST=1 + cmake --preset=default -DVCPKG_TARGET_TRIPLET=arm64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-arm64 -B ${{github.workspace}}/build-arm64 -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DCMAKE_OSX_ARCHITECTURES="arm64" -DBUILD_TESTS=OFF cmake --build ${{github.workspace}}/build-arm64 --config ${{matrix.build_type}} - name: Create a universal binary @@ -114,4 +114,4 @@ jobs: - uses: actions/upload-artifact@v3 with: name: macos-latest ${{matrix.build_type}} binaries - path: ${{github.workspace}}/*.zip \ No newline at end of file + path: ${{github.workspace}}/*.zip diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 981749f..9f8873a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -57,7 +57,7 @@ jobs: - name: Package run: | - cmake --preset=default -DVCPKG_TARGET_TRIPLET=${{matrix.vcpkg_triplet}} -B ${{github.workspace}}/pack -DCMAKE_BUILD_TYPE=Release -DNOTEST=1 + cmake --preset=default -DVCPKG_TARGET_TRIPLET=${{matrix.vcpkg_triplet}} -B ${{github.workspace}}/pack -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=OFF cmake --build ${{github.workspace}}/pack --config Release cpack --config ${{github.workspace}}/pack/CPackConfig.cmake -C Release -G ZIP @@ -93,12 +93,12 @@ jobs: - name: Build for x64 run: | - cmake --preset=default -DVCPKG_TARGET_TRIPLET=x64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-x64 -B ${{github.workspace}}/build-x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_OSX_ARCHITECTURES="x86_64" -DNOTEST=1 + cmake --preset=default -DVCPKG_TARGET_TRIPLET=x64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-x64 -B ${{github.workspace}}/build-x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_OSX_ARCHITECTURES="x86_64" -DBUILD_TESTS=OFF cmake --build ${{github.workspace}}/build-x64 --config Release - name: Build for arm64 run: | - cmake --preset=default -DVCPKG_TARGET_TRIPLET=arm64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-arm64 -B ${{github.workspace}}/build-arm64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_OSX_ARCHITECTURES="arm64" + cmake --preset=default -DVCPKG_TARGET_TRIPLET=arm64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-arm64 -B ${{github.workspace}}/build-arm64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_OSX_ARCHITECTURES="arm64" -DBUILD_TESTS=OFF cmake --build ${{github.workspace}}/build-arm64 --config Release - name: Test # don't release if tests are failing diff --git a/.gitignore b/.gitignore index c0c9c35..4ca4e6d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,72 @@ -.idea -build*/ install -.vscode -.cache CMakeSettings.json -.vs TestResults -cmake-build* .DS_Store *.zip -_CPack_Packages/ .PVS-Studio + +# IDE +.idea +.vscode +.vs + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build*/ +cmake-build*/ +_CPack_Packages/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 239f107..020f85b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.23) +cmake_minimum_required(VERSION 3.30) project(acquire-zarr) cmake_policy(SET CMP0079 NEW) # allows use with targets in other directories enable_testing() @@ -19,9 +19,13 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -option(NOTEST "Disable all tests" OFF) +option(BUILD_TESTS "Build tests" ON) add_subdirectory(src) -add_subdirectory(tests) +if (${BUILD_TESTS}) + add_subdirectory(tests) +else () + message(STATUS "Skipping test targets") +endif () include(CPack) diff --git a/include/acquire.zarr.h b/include/acquire.zarr.h index d74a587..32ae6ba 100644 --- a/include/acquire.zarr.h +++ b/include/acquire.zarr.h @@ -103,9 +103,9 @@ extern "C" * @return ZarrStatusCode_Success on success, or an error code on failure. */ ZarrStatusCode ZarrStream_append(ZarrStream* stream, - const void* data, - size_t bytes_in, - size_t* bytes_out); + const void* data, + size_t bytes_in, + size_t* bytes_out); #ifdef __cplusplus } diff --git a/src/streaming/CMakeLists.txt b/src/streaming/CMakeLists.txt index 753c943..3ab50ac 100644 --- a/src/streaming/CMakeLists.txt +++ b/src/streaming/CMakeLists.txt @@ -51,6 +51,7 @@ target_compile_definitions(${tgt} PRIVATE set_target_properties(${tgt} PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>" + POSITION_INDEPENDENT_CODE ON ) install(TARGETS ${tgt} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e308d23..4196c1c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,6 +1,2 @@ -if (${NOTEST}) - message(STATUS "Skipping test targets") -else () - add_subdirectory(unit-tests) - add_subdirectory(integration) -endif () +add_subdirectory(unit-tests) +add_subdirectory(integration) From c160d7c7ab9f1210238f5e8b5283236451bc13c2 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Wed, 9 Oct 2024 13:43:44 -0400 Subject: [PATCH 09/28] Don't export the enum values into the module base namespace. --- python/acquire-zarr-py.cpp | 63 ++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 34 deletions(-) diff --git a/python/acquire-zarr-py.cpp b/python/acquire-zarr-py.cpp index 5635cc8..f794a2b 100644 --- a/python/acquire-zarr-py.cpp +++ b/python/acquire-zarr-py.cpp @@ -289,41 +289,36 @@ PYBIND11_MODULE(acquire_zarr, m) append )pbdoc"; - py::enum_(m, "Version") + py::enum_(m, "ZarrVersion") .value("V2", ZarrVersion_2) - .value("V3", ZarrVersion_3) - .export_values(); - - py::enum_(m, "DType") - .value("DTYPE_UINT8", ZarrDataType_uint8) - .value("DTYPE_UINT16", ZarrDataType_uint16) - .value("DTYPE_UINT32", ZarrDataType_uint32) - .value("DTYPE_UINT64", ZarrDataType_uint64) - .value("DTYPE_INT8", ZarrDataType_int8) - .value("DTYPE_INT16", ZarrDataType_int16) - .value("DTYPE_INT32", ZarrDataType_int32) - .value("DTYPE_INT64", ZarrDataType_int64) - .value("DTYPE_FLOAT32", ZarrDataType_float32) - .value("DTYPE_FLOAT64", ZarrDataType_float64) - .export_values(); - - py::enum_(m, "Compressor") - .value("COMPRESSOR_NONE", ZarrCompressor_None) - .value("COMPRESSOR_BLOSC1", ZarrCompressor_Blosc1) - .export_values(); - - py::enum_(m, "CompressionCodec") - .value("COMPRESSION_NONE", ZarrCompressionCodec_None) - .value("COMPRESSION_BLOSC_LZ4", ZarrCompressionCodec_BloscLZ4) - .value("COMPRESSION_BLOSC_ZSTD", ZarrCompressionCodec_BloscZstd) - .export_values(); - - py::enum_(m, "DimensionType") - .value("DIMENSION_TYPE_SPACE", ZarrDimensionType_Space) - .value("DIMENSION_TYPE_CHANNEL", ZarrDimensionType_Channel) - .value("DIMENSION_TYPE_TIME", ZarrDimensionType_Time) - .value("DIMENSION_TYPE_OTHER", ZarrDimensionType_Other) - .export_values(); + .value("V3", ZarrVersion_3); + + py::enum_(m, "ZarrDataType") + .value("UINT8", ZarrDataType_uint8) + .value("UINT16", ZarrDataType_uint16) + .value("UINT32", ZarrDataType_uint32) + .value("UINT64", ZarrDataType_uint64) + .value("INT8", ZarrDataType_int8) + .value("INT16", ZarrDataType_int16) + .value("INT32", ZarrDataType_int32) + .value("INT64", ZarrDataType_int64) + .value("FLOAT32", ZarrDataType_float32) + .value("FLOAT64", ZarrDataType_float64); + + py::enum_(m, "ZarrCompressor") + .value("NONE", ZarrCompressor_None) + .value("BLOSC1", ZarrCompressor_Blosc1); + + py::enum_(m, "ZarrCompressionCodec") + .value("NONE", ZarrCompressionCodec_None) + .value("BLOSC_LZ4", ZarrCompressionCodec_BloscLZ4) + .value("BLOSC_ZSTD", ZarrCompressionCodec_BloscZstd); + + py::enum_(m, "ZarrDimensionType") + .value("SPACE", ZarrDimensionType_Space) + .value("CHANNEL", ZarrDimensionType_Channel) + .value("TIME", ZarrDimensionType_Time) + .value("OTHER", ZarrDimensionType_Other); py::class_(m, "S3Settings") .def(py::init<>()) From 7e51977a94ba8e8ec56c0837ef1125d23dc49009 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Wed, 9 Oct 2024 15:07:24 -0400 Subject: [PATCH 10/28] (wip) some basic tests --- CMakeLists.txt | 3 +- python/CMakeLists.txt | 2 - python/acquire-zarr-py.cpp | 240 +++++++++++++++++++++++++++++----- python/tests/test_settings.py | 84 ++++++++++++ 4 files changed, 296 insertions(+), 33 deletions(-) create mode 100644 python/tests/test_settings.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 98fcb20..42e824d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,6 @@ -cmake_minimum_required(VERSION 3.30) +cmake_minimum_required(VERSION 3.23) project(acquire-zarr) +cmake_policy(SET CMP0057 NEW) cmake_policy(SET CMP0079 NEW) # allows use with targets in other directories enable_testing() diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 9a7caed..b353c55 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,6 +1,4 @@ -cmake_minimum_required(VERSION 3.30) project(acquire-zarr-py) -cmake_policy(SET CMP0057 NEW) execute_process(COMMAND python3 -m pybind11 --cmakedir RESULT_VARIABLE pybind11_NOT_FOUND diff --git a/python/acquire-zarr-py.cpp b/python/acquire-zarr-py.cpp index f794a2b..fec3167 100644 --- a/python/acquire-zarr-py.cpp +++ b/python/acquire-zarr-py.cpp @@ -14,6 +14,81 @@ auto ZarrStreamDeleter = [](ZarrStream_s* stream) { ZarrStream_destroy(stream); } }; + +const char* +data_type_to_str(ZarrDataType t) +{ + switch (t) { + case ZarrDataType_uint8: + return "UINT8"; + case ZarrDataType_uint16: + return "UINT16"; + case ZarrDataType_uint32: + return "UINT32"; + case ZarrDataType_uint64: + return "UINT64"; + case ZarrDataType_int8: + return "INT8"; + case ZarrDataType_int16: + return "INT16"; + case ZarrDataType_int32: + return "INT32"; + case ZarrDataType_int64: + return "INT64"; + case ZarrDataType_float32: + return "FLOAT32"; + case ZarrDataType_float64: + return "FLOAT64"; + default: + return "UNKNOWN"; + } +} + +const char* +compressor_to_str(ZarrCompressor c) +{ + switch (c) { + case ZarrCompressor_None: + return "NONE"; + case ZarrCompressor_Blosc1: + return "BLOSC1"; + default: + return "UNKNOWN"; + } +} + +const char* +compression_codec_to_str(ZarrCompressionCodec c) +{ + switch (c) { + case ZarrCompressionCodec_None: + return "NONE"; + case ZarrCompressionCodec_BloscLZ4: + return "BLOSC_LZ4"; + case ZarrCompressionCodec_BloscZstd: + return "BLOSC_ZSTD"; + default: + return "UNKNOWN"; + } +} + +const char* +dimension_type_to_str(ZarrDimensionType t) +{ + switch (t) { + case ZarrDimensionType_Space: + return "SPACE"; + case ZarrDimensionType_Channel: + return "CHANNEL"; + case ZarrDimensionType_Time: + return "TIME"; + case ZarrDimensionType_Other: + return "OTHER"; + default: + return "UNKNOWN"; + } +} + } // namespace class PyZarrS3Settings @@ -161,7 +236,7 @@ class PyZarrStreamSettings std::vector dimensions_; bool multiscale_ = false; ZarrDataType data_type_; - ZarrVersion version_; + ZarrVersion version_{ ZarrVersion_2 }; }; class PyZarrStream @@ -280,6 +355,8 @@ class PyZarrStream PYBIND11_MODULE(acquire_zarr, m) { + using namespace pybind11::literals; + m.doc() = R"pbdoc( Acquire Zarr Writer Python API ----------------------- @@ -294,34 +371,65 @@ PYBIND11_MODULE(acquire_zarr, m) .value("V3", ZarrVersion_3); py::enum_(m, "ZarrDataType") - .value("UINT8", ZarrDataType_uint8) - .value("UINT16", ZarrDataType_uint16) - .value("UINT32", ZarrDataType_uint32) - .value("UINT64", ZarrDataType_uint64) - .value("INT8", ZarrDataType_int8) - .value("INT16", ZarrDataType_int16) - .value("INT32", ZarrDataType_int32) - .value("INT64", ZarrDataType_int64) - .value("FLOAT32", ZarrDataType_float32) - .value("FLOAT64", ZarrDataType_float64); + .value(data_type_to_str(ZarrDataType_uint8), ZarrDataType_uint8) + .value(data_type_to_str(ZarrDataType_uint16), ZarrDataType_uint16) + .value(data_type_to_str(ZarrDataType_uint32), ZarrDataType_uint32) + .value(data_type_to_str(ZarrDataType_uint64), ZarrDataType_uint64) + .value(data_type_to_str(ZarrDataType_int8), ZarrDataType_int8) + .value(data_type_to_str(ZarrDataType_int16), ZarrDataType_int16) + .value(data_type_to_str(ZarrDataType_int32), ZarrDataType_int32) + .value(data_type_to_str(ZarrDataType_int64), ZarrDataType_int64) + .value(data_type_to_str(ZarrDataType_float32), ZarrDataType_float32) + .value(data_type_to_str(ZarrDataType_float64), ZarrDataType_float64); py::enum_(m, "ZarrCompressor") - .value("NONE", ZarrCompressor_None) - .value("BLOSC1", ZarrCompressor_Blosc1); + .value(compressor_to_str(ZarrCompressor_None), ZarrCompressor_None) + .value(compressor_to_str(ZarrCompressor_Blosc1), ZarrCompressor_Blosc1); py::enum_(m, "ZarrCompressionCodec") - .value("NONE", ZarrCompressionCodec_None) - .value("BLOSC_LZ4", ZarrCompressionCodec_BloscLZ4) - .value("BLOSC_ZSTD", ZarrCompressionCodec_BloscZstd); + .value(compression_codec_to_str(ZarrCompressionCodec_None), + ZarrCompressionCodec_None) + .value(compression_codec_to_str(ZarrCompressionCodec_BloscLZ4), + ZarrCompressionCodec_BloscLZ4) + .value(compression_codec_to_str(ZarrCompressionCodec_BloscZstd), + ZarrCompressionCodec_BloscZstd); py::enum_(m, "ZarrDimensionType") - .value("SPACE", ZarrDimensionType_Space) - .value("CHANNEL", ZarrDimensionType_Channel) - .value("TIME", ZarrDimensionType_Time) - .value("OTHER", ZarrDimensionType_Other); - - py::class_(m, "S3Settings") - .def(py::init<>()) + .value(dimension_type_to_str(ZarrDimensionType_Space), + ZarrDimensionType_Space) + .value(dimension_type_to_str(ZarrDimensionType_Channel), + ZarrDimensionType_Channel) + .value(dimension_type_to_str(ZarrDimensionType_Time), + ZarrDimensionType_Time) + .value(dimension_type_to_str(ZarrDimensionType_Other), + ZarrDimensionType_Other); + + py::class_(m, "ZarrS3Settings", py::dynamic_attr()) + .def(py::init([](py::kwargs kwargs) { + PyZarrS3Settings settings; + if (kwargs.contains("endpoint")) + settings.set_endpoint(kwargs["endpoint"].cast()); + if (kwargs.contains("bucket_name")) + settings.set_bucket_name( + kwargs["bucket_name"].cast()); + if (kwargs.contains("access_key_id")) + settings.set_access_key_id( + kwargs["access_key_id"].cast()); + if (kwargs.contains("secret_access_key")) + settings.set_secret_access_key( + kwargs["secret_access_key"].cast()); + return settings; + })) + .def("__repr__", + [](const PyZarrS3Settings& self) { + auto sac = self.secret_access_key().empty() + ? "" + : self.secret_access_key().substr(0, 5) + "..."; + return "ZarrS3Settings(endpoint='" + self.endpoint() + + "', bucket_name='" + self.bucket_name() + + "', access_key_id='" + self.access_key_id() + + "', secret_access_key='" + sac + "')"; + }) .def_property("endpoint", &PyZarrS3Settings::endpoint, &PyZarrS3Settings::set_endpoint) @@ -335,8 +443,30 @@ PYBIND11_MODULE(acquire_zarr, m) &PyZarrS3Settings::secret_access_key, &PyZarrS3Settings::set_secret_access_key); - py::class_(m, "ZarrCompressionSettings") - .def(py::init<>()) + py::class_( + m, "ZarrCompressionSettings", py::dynamic_attr()) + .def(py::init([](py::kwargs kwargs) { + PyZarrCompressionSettings settings; + if (kwargs.contains("compressor")) + settings.set_compressor( + kwargs["compressor"].cast()); + if (kwargs.contains("codec")) + settings.set_codec(kwargs["codec"].cast()); + if (kwargs.contains("level")) + settings.set_level(kwargs["level"].cast()); + if (kwargs.contains("shuffle")) + settings.set_shuffle(kwargs["shuffle"].cast()); + return settings; + })) + .def("__repr__", + [](const PyZarrCompressionSettings& self) { + return "ZarrCompressionSettings(compressor=ZarrCompressor." + + std::string(compressor_to_str(self.compressor())) + + ", codec=ZarrCompressionCodec." + + std::string(compression_codec_to_str(self.codec())) + + ", level=" + std::to_string(self.level()) + + ", shuffle=" + std::to_string(self.shuffle()) + ")"; + }) .def_property("compressor", &PyZarrCompressionSettings::compressor, &PyZarrCompressionSettings::set_compressor) @@ -350,12 +480,27 @@ PYBIND11_MODULE(acquire_zarr, m) &PyZarrCompressionSettings::shuffle, &PyZarrCompressionSettings::set_shuffle); - py::class_(m, "ZarrDimensionProperties") - .def(py::init<>()) + py::class_( + m, "ZarrDimensionProperties", py::dynamic_attr()) + .def(py::init([](py::kwargs kwargs) { + PyZarrDimensionProperties props; + if (kwargs.contains("name")) + props.set_name(kwargs["name"].cast()); + if (kwargs.contains("kind")) + props.set_type(kwargs["kind"].cast()); + if (kwargs.contains("array_size_px")) + props.set_array_size_px(kwargs["array_size_px"].cast()); + if (kwargs.contains("chunk_size_px")) + props.set_chunk_size_px(kwargs["chunk_size_px"].cast()); + if (kwargs.contains("shard_size_chunks")) + props.set_shard_size_chunks( + kwargs["shard_size_chunks"].cast()); + return props; + })) .def_property("name", &PyZarrDimensionProperties::name, &PyZarrDimensionProperties::set_name) - .def_property("type", + .def_property("kind", &PyZarrDimensionProperties::type, &PyZarrDimensionProperties::set_type) .def_property("array_size_px", @@ -368,8 +513,43 @@ PYBIND11_MODULE(acquire_zarr, m) &PyZarrDimensionProperties::shard_size_chunks, &PyZarrDimensionProperties::set_shard_size_chunks); - py::class_(m, "ZarrStreamSettings") - .def(py::init<>()) + py::class_( + m, "ZarrStreamSettings", py::dynamic_attr()) + .def(py::init([](py::kwargs kwargs) { + PyZarrStreamSettings settings; + + if (kwargs.contains("store_path")) + settings.set_store_path(kwargs["store_path"].cast()); + + if (kwargs.contains("custom_metadata")) + settings.set_custom_metadata( + kwargs["custom_metadata"].cast>()); + + if (kwargs.contains("s3")) + settings.set_s3( + kwargs["s3"].cast>()); + + if (kwargs.contains("compression")) + settings.set_compression( + kwargs["compression"] + .cast>()); + + if (kwargs.contains("dimensions")) + settings.set_dimensions( + kwargs["dimensions"] + .cast>()); + + if (kwargs.contains("multiscale")) + settings.set_multiscale(kwargs["multiscale"].cast()); + + if (kwargs.contains("data_type")) + settings.set_data_type(kwargs["data_type"].cast()); + + if (kwargs.contains("version")) + settings.set_version(kwargs["version"].cast()); + + return settings; + })) .def_property("store_path", &PyZarrStreamSettings::store_path, &PyZarrStreamSettings::set_store_path) diff --git a/python/tests/test_settings.py b/python/tests/test_settings.py new file mode 100644 index 0000000..0642095 --- /dev/null +++ b/python/tests/test_settings.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +import json +from pathlib import Path + +import numpy as np +import pytest + +import acquire_zarr + + +@pytest.fixture(scope="function") +def settings(): + return acquire_zarr.ZarrStreamSettings() + + +def test_settings_set_store_path(settings): + assert settings.store_path == "" + + this_dir = str(Path(__file__).parent) + settings.store_path = this_dir + + assert settings.store_path == this_dir + + +def test_settings_set_custom_metadata(settings): + assert settings.custom_metadata is None + + metadata = json.dumps({"foo": "bar"}) + settings.custom_metadata = metadata + + assert settings.custom_metadata == metadata + + +def test_set_s3_settings(settings): + assert settings.s3 is None + + s3_settings = acquire_zarr.ZarrS3Settings( + endpoint="foo", + bucket_name="bar", + access_key_id="baz", + secret_access_key="qux", + ) + settings.s3 = s3_settings + + assert settings.s3 is not None + assert settings.s3.endpoint == "foo" + assert settings.s3.bucket_name == "bar" + assert settings.s3.access_key_id == "baz" + assert settings.s3.secret_access_key == "qux" + + +def test_set_compression_settings(settings): + assert settings.compression is None + + compression_settings = acquire_zarr.ZarrCompressionSettings( + compressor=acquire_zarr.ZarrCompressor.BLOSC1, + codec=acquire_zarr.ZarrCompressionCodec.BLOSC_ZSTD, + level=5, + shuffle=2, + ) + + settings.compression = compression_settings + assert settings.compression is not None + assert settings.compression.compressor == acquire_zarr.ZarrCompressor.BLOSC1 + assert settings.compression.codec == acquire_zarr.ZarrCompressionCodec.BLOSC_ZSTD + assert settings.compression.level == 5 + assert settings.compression.shuffle == 2 + + +def test_set_multiscale(settings): + assert settings.multiscale is False + + settings.multiscale = True + + assert settings.multiscale is True + + +def test_set_version(settings): + assert settings.version == acquire_zarr.ZarrVersion.V2 + + settings.version = acquire_zarr.ZarrVersion.V3 + + assert settings.version == acquire_zarr.ZarrVersion.V3 From 8c76e2d8792939871891a7a179ab1717e2c13196 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Wed, 9 Oct 2024 15:24:28 -0400 Subject: [PATCH 11/28] Revert CMake minimum version and use cmake_policy. Using builtin `BUILD_TESTING` cmake option. --- CMakeLists.txt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 020f85b..e508b4d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,6 @@ -cmake_minimum_required(VERSION 3.30) +cmake_minimum_required(VERSION 3.23) project(acquire-zarr) +cmake_policy(SET CMP0057 NEW) # allows IN_LIST operator (for pybind11) cmake_policy(SET CMP0079 NEW) # allows use with targets in other directories enable_testing() @@ -19,10 +20,12 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -option(BUILD_TESTS "Build tests" ON) +if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) + include(CTest) +endif() add_subdirectory(src) -if (${BUILD_TESTS}) +if (BUILD_TESTING) add_subdirectory(tests) else () message(STATUS "Skipping test targets") From 60a0bd5fc4e61724a12b4c905c9a552f39f0c9e9 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Wed, 9 Oct 2024 15:27:22 -0400 Subject: [PATCH 12/28] Update build.yml --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d67fc44..6e8b629 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -93,12 +93,12 @@ jobs: - name: Build for x64 run: | - cmake --preset=default -DVCPKG_TARGET_TRIPLET=x64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-x64 -B ${{github.workspace}}/build-x64 -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DCMAKE_OSX_ARCHITECTURES="x86_64" -DBUILD_TESTS=OFF + cmake --preset=default -DVCPKG_TARGET_TRIPLET=x64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-x64 -B ${{github.workspace}}/build-x64 -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DCMAKE_OSX_ARCHITECTURES="x86_64" -DBUILD_TESTING=OFF cmake --build ${{github.workspace}}/build-x64 --config ${{matrix.build_type}} - name: Build for arm64 run: | - cmake --preset=default -DVCPKG_TARGET_TRIPLET=arm64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-arm64 -B ${{github.workspace}}/build-arm64 -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DCMAKE_OSX_ARCHITECTURES="arm64" -DBUILD_TESTS=OFF + cmake --preset=default -DVCPKG_TARGET_TRIPLET=arm64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-arm64 -B ${{github.workspace}}/build-arm64 -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DCMAKE_OSX_ARCHITECTURES="arm64" -DBUILD_TESTING=OFF cmake --build ${{github.workspace}}/build-arm64 --config ${{matrix.build_type}} - name: Create a universal binary From 9c5c6bf34ad59b0989e4b86423f5d2f11466ed48 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Wed, 9 Oct 2024 15:28:03 -0400 Subject: [PATCH 13/28] Update release.yml --- .github/workflows/release.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9f8873a..070250a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -57,7 +57,7 @@ jobs: - name: Package run: | - cmake --preset=default -DVCPKG_TARGET_TRIPLET=${{matrix.vcpkg_triplet}} -B ${{github.workspace}}/pack -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=OFF + cmake --preset=default -DVCPKG_TARGET_TRIPLET=${{matrix.vcpkg_triplet}} -B ${{github.workspace}}/pack -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF cmake --build ${{github.workspace}}/pack --config Release cpack --config ${{github.workspace}}/pack/CPackConfig.cmake -C Release -G ZIP @@ -93,12 +93,12 @@ jobs: - name: Build for x64 run: | - cmake --preset=default -DVCPKG_TARGET_TRIPLET=x64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-x64 -B ${{github.workspace}}/build-x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_OSX_ARCHITECTURES="x86_64" -DBUILD_TESTS=OFF + cmake --preset=default -DVCPKG_TARGET_TRIPLET=x64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-x64 -B ${{github.workspace}}/build-x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_OSX_ARCHITECTURES="x86_64" -DBUILD_TESTING=OFF cmake --build ${{github.workspace}}/build-x64 --config Release - name: Build for arm64 run: | - cmake --preset=default -DVCPKG_TARGET_TRIPLET=arm64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-arm64 -B ${{github.workspace}}/build-arm64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_OSX_ARCHITECTURES="arm64" -DBUILD_TESTS=OFF + cmake --preset=default -DVCPKG_TARGET_TRIPLET=arm64-osx -DVCPKG_INSTALLED_DIR=${{github.workspace}}/vcpkg-arm64 -B ${{github.workspace}}/build-arm64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_OSX_ARCHITECTURES="arm64" -DBUILD_TESTING=OFF cmake --build ${{github.workspace}}/build-arm64 --config Release - name: Test # don't release if tests are failing From 8faabe5eada7ef3b288d0714d97add25815ef33a Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Thu, 10 Oct 2024 11:30:49 -0400 Subject: [PATCH 14/28] some simple tests --- python/acquire-zarr-py.cpp | 137 ++++++++++++++++++++++------------ python/tests/test_settings.py | 60 +++++++++++++-- 2 files changed, 141 insertions(+), 56 deletions(-) diff --git a/python/acquire-zarr-py.cpp b/python/acquire-zarr-py.cpp index fec3167..ae78506 100644 --- a/python/acquire-zarr-py.cpp +++ b/python/acquire-zarr-py.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include "acquire.zarr.h" @@ -115,6 +116,17 @@ class PyZarrS3Settings } const std::string& secret_access_key() const { return secret_access_key_; } + std::string repr() const + { + auto secret_access_key = secret_access_key_.size() < 6 + ? secret_access_key_ + : secret_access_key_.substr(0, 5) + "..."; + + return "S3Settings(endpoint='" + endpoint_ + "', bucket_name='" + + bucket_name_ + "', access_key_id='" + access_key_id_ + + "', secret_access_key='" + secret_access_key + "')"; + } + private: std::string endpoint_; std::string bucket_name_; @@ -140,6 +152,16 @@ class PyZarrCompressionSettings uint8_t shuffle() const { return shuffle_; } void set_shuffle(uint8_t shuffle) { shuffle_ = shuffle; } + std::string repr() const + { + return "CompressionSettings(compressor=Compressor." + + std::string(compressor_to_str(compressor_)) + + ", codec=CompressionCodec." + + std::string(compression_codec_to_str(codec_)) + + ", level=" + std::to_string(level_) + + ", shuffle=" + std::to_string(shuffle_) + ")"; + } + private: ZarrCompressor compressor_; ZarrCompressionCodec codec_; @@ -168,20 +190,34 @@ class PyZarrDimensionProperties uint32_t shard_size_chunks() const { return shard_size_chunks_; } void set_shard_size_chunks(uint32_t size) { shard_size_chunks_ = size; } + std::string repr() const + { + return "Dimension(name='" + name_ + "', kind=DimensionType." + + std::string(dimension_type_to_str(type_)) + + ", array_size_px=" + std::to_string(array_size_px_) + + ", chunk_size_px=" + std::to_string(chunk_size_px_) + + ", shard_size_chunks=" + std::to_string(shard_size_chunks_) + + ")"; + } + private: std::string name_; - ZarrDimensionType type_; - uint32_t array_size_px_; - uint32_t chunk_size_px_; - uint32_t shard_size_chunks_; + ZarrDimensionType type_{ ZarrDimensionType_Space }; + uint32_t array_size_px_{ 0 }; + uint32_t chunk_size_px_{ 0 }; + uint32_t shard_size_chunks_{ 0 }; }; +PYBIND11_MAKE_OPAQUE(std::vector); + class PyZarrStreamSettings { public: PyZarrStreamSettings() = default; ~PyZarrStreamSettings() = default; + std::vector dimensions; + std::string store_path() const { return store_path_; } void set_store_path(const std::string& path) { store_path_ = path; } @@ -210,15 +246,6 @@ class PyZarrStreamSettings compression_settings_ = settings; } - std::vector dimensions() const - { - return dimensions_; - } - void set_dimensions(const std::vector& dims) - { - dimensions_ = dims; - } - bool multiscale() const { return multiscale_; } void set_multiscale(bool multiscale) { multiscale_ = multiscale; } @@ -233,9 +260,8 @@ class PyZarrStreamSettings std::optional custom_metadata_; std::optional s3_settings_; std::optional compression_settings_; - std::vector dimensions_; bool multiscale_ = false; - ZarrDataType data_type_; + ZarrDataType data_type_{ ZarrDataType_uint8 }; ZarrVersion version_{ ZarrVersion_2 }; }; @@ -286,7 +312,7 @@ class PyZarrStream stream_settings.compression_settings = &compression_settings; } - const auto& dims = settings.dimensions(); + const auto& dims = settings.dimensions; std::vector dimension_props; std::vector dimension_names(dims.size()); @@ -366,11 +392,14 @@ PYBIND11_MODULE(acquire_zarr, m) append )pbdoc"; + py::bind_vector>(m, + "VectorDimension"); + py::enum_(m, "ZarrVersion") .value("V2", ZarrVersion_2) .value("V3", ZarrVersion_3); - py::enum_(m, "ZarrDataType") + py::enum_(m, "DataType") .value(data_type_to_str(ZarrDataType_uint8), ZarrDataType_uint8) .value(data_type_to_str(ZarrDataType_uint16), ZarrDataType_uint16) .value(data_type_to_str(ZarrDataType_uint32), ZarrDataType_uint32) @@ -382,11 +411,11 @@ PYBIND11_MODULE(acquire_zarr, m) .value(data_type_to_str(ZarrDataType_float32), ZarrDataType_float32) .value(data_type_to_str(ZarrDataType_float64), ZarrDataType_float64); - py::enum_(m, "ZarrCompressor") + py::enum_(m, "Compressor") .value(compressor_to_str(ZarrCompressor_None), ZarrCompressor_None) .value(compressor_to_str(ZarrCompressor_Blosc1), ZarrCompressor_Blosc1); - py::enum_(m, "ZarrCompressionCodec") + py::enum_(m, "CompressionCodec") .value(compression_codec_to_str(ZarrCompressionCodec_None), ZarrCompressionCodec_None) .value(compression_codec_to_str(ZarrCompressionCodec_BloscLZ4), @@ -394,7 +423,7 @@ PYBIND11_MODULE(acquire_zarr, m) .value(compression_codec_to_str(ZarrCompressionCodec_BloscZstd), ZarrCompressionCodec_BloscZstd); - py::enum_(m, "ZarrDimensionType") + py::enum_(m, "DimensionType") .value(dimension_type_to_str(ZarrDimensionType_Space), ZarrDimensionType_Space) .value(dimension_type_to_str(ZarrDimensionType_Channel), @@ -404,7 +433,7 @@ PYBIND11_MODULE(acquire_zarr, m) .value(dimension_type_to_str(ZarrDimensionType_Other), ZarrDimensionType_Other); - py::class_(m, "ZarrS3Settings", py::dynamic_attr()) + py::class_(m, "S3Settings", py::dynamic_attr()) .def(py::init([](py::kwargs kwargs) { PyZarrS3Settings settings; if (kwargs.contains("endpoint")) @@ -420,16 +449,7 @@ PYBIND11_MODULE(acquire_zarr, m) kwargs["secret_access_key"].cast()); return settings; })) - .def("__repr__", - [](const PyZarrS3Settings& self) { - auto sac = self.secret_access_key().empty() - ? "" - : self.secret_access_key().substr(0, 5) + "..."; - return "ZarrS3Settings(endpoint='" + self.endpoint() + - "', bucket_name='" + self.bucket_name() + - "', access_key_id='" + self.access_key_id() + - "', secret_access_key='" + sac + "')"; - }) + .def("__repr__", [](const PyZarrS3Settings& self) { return self.repr(); }) .def_property("endpoint", &PyZarrS3Settings::endpoint, &PyZarrS3Settings::set_endpoint) @@ -444,7 +464,7 @@ PYBIND11_MODULE(acquire_zarr, m) &PyZarrS3Settings::set_secret_access_key); py::class_( - m, "ZarrCompressionSettings", py::dynamic_attr()) + m, "CompressionSettings", py::dynamic_attr()) .def(py::init([](py::kwargs kwargs) { PyZarrCompressionSettings settings; if (kwargs.contains("compressor")) @@ -459,14 +479,7 @@ PYBIND11_MODULE(acquire_zarr, m) return settings; })) .def("__repr__", - [](const PyZarrCompressionSettings& self) { - return "ZarrCompressionSettings(compressor=ZarrCompressor." + - std::string(compressor_to_str(self.compressor())) + - ", codec=ZarrCompressionCodec." + - std::string(compression_codec_to_str(self.codec())) + - ", level=" + std::to_string(self.level()) + - ", shuffle=" + std::to_string(self.shuffle()) + ")"; - }) + [](const PyZarrCompressionSettings& self) { return self.repr(); }) .def_property("compressor", &PyZarrCompressionSettings::compressor, &PyZarrCompressionSettings::set_compressor) @@ -480,8 +493,7 @@ PYBIND11_MODULE(acquire_zarr, m) &PyZarrCompressionSettings::shuffle, &PyZarrCompressionSettings::set_shuffle); - py::class_( - m, "ZarrDimensionProperties", py::dynamic_attr()) + py::class_(m, "Dimension", py::dynamic_attr()) .def(py::init([](py::kwargs kwargs) { PyZarrDimensionProperties props; if (kwargs.contains("name")) @@ -497,6 +509,8 @@ PYBIND11_MODULE(acquire_zarr, m) kwargs["shard_size_chunks"].cast()); return props; })) + .def("__repr__", + [](const PyZarrDimensionProperties& self) { return self.repr(); }) .def_property("name", &PyZarrDimensionProperties::name, &PyZarrDimensionProperties::set_name) @@ -513,8 +527,7 @@ PYBIND11_MODULE(acquire_zarr, m) &PyZarrDimensionProperties::shard_size_chunks, &PyZarrDimensionProperties::set_shard_size_chunks); - py::class_( - m, "ZarrStreamSettings", py::dynamic_attr()) + py::class_(m, "StreamSettings", py::dynamic_attr()) .def(py::init([](py::kwargs kwargs) { PyZarrStreamSettings settings; @@ -535,9 +548,9 @@ PYBIND11_MODULE(acquire_zarr, m) .cast>()); if (kwargs.contains("dimensions")) - settings.set_dimensions( + settings.dimensions = kwargs["dimensions"] - .cast>()); + .cast>(); if (kwargs.contains("multiscale")) settings.set_multiscale(kwargs["multiscale"].cast()); @@ -550,6 +563,34 @@ PYBIND11_MODULE(acquire_zarr, m) return settings; })) + .def("__repr__", + [](const PyZarrStreamSettings& self) { + std::string repr = + "StreamSettings(store_path='" + self.store_path(); + if (self.custom_metadata().has_value()) { + repr += + ", custom_metadata='" + self.custom_metadata().value(); + } + + if (self.s3().has_value()) { + repr += ", s3=" + self.s3()->repr(); + } + if (self.compression().has_value()) { + repr += ", compression=" + self.compression()->repr(); + } + repr += ", dimensions=["; + for (const auto& dim : self.dimensions) { + repr += dim.repr() + ", "; + } + repr += + "], multiscale=" + std::to_string(self.multiscale()) + + ", data_type=DataType." + + std::string(data_type_to_str(self.data_type())) + + ", version=ZarrVersion." + + std::string(self.version() == ZarrVersion_2 ? "V2" : "V3") + + ")"; + return repr; + }) .def_property("store_path", &PyZarrStreamSettings::store_path, &PyZarrStreamSettings::set_store_path) @@ -598,9 +639,7 @@ PYBIND11_MODULE(acquire_zarr, m) self.set_compression(obj.cast()); } }) - .def_property("dimensions", - &PyZarrStreamSettings::dimensions, - &PyZarrStreamSettings::set_dimensions) + .def_readwrite("dimensions", &PyZarrStreamSettings::dimensions) .def_property("multiscale", &PyZarrStreamSettings::multiscale, &PyZarrStreamSettings::set_multiscale) diff --git a/python/tests/test_settings.py b/python/tests/test_settings.py index 0642095..2ca7afd 100644 --- a/python/tests/test_settings.py +++ b/python/tests/test_settings.py @@ -11,7 +11,7 @@ @pytest.fixture(scope="function") def settings(): - return acquire_zarr.ZarrStreamSettings() + return acquire_zarr.StreamSettings() def test_settings_set_store_path(settings): @@ -35,7 +35,7 @@ def test_settings_set_custom_metadata(settings): def test_set_s3_settings(settings): assert settings.s3 is None - s3_settings = acquire_zarr.ZarrS3Settings( + s3_settings = acquire_zarr.S3Settings( endpoint="foo", bucket_name="bar", access_key_id="baz", @@ -53,21 +53,67 @@ def test_set_s3_settings(settings): def test_set_compression_settings(settings): assert settings.compression is None - compression_settings = acquire_zarr.ZarrCompressionSettings( - compressor=acquire_zarr.ZarrCompressor.BLOSC1, - codec=acquire_zarr.ZarrCompressionCodec.BLOSC_ZSTD, + compression_settings = acquire_zarr.CompressionSettings( + compressor=acquire_zarr.Compressor.BLOSC1, + codec=acquire_zarr.CompressionCodec.BLOSC_ZSTD, level=5, shuffle=2, ) settings.compression = compression_settings assert settings.compression is not None - assert settings.compression.compressor == acquire_zarr.ZarrCompressor.BLOSC1 - assert settings.compression.codec == acquire_zarr.ZarrCompressionCodec.BLOSC_ZSTD + assert settings.compression.compressor == acquire_zarr.Compressor.BLOSC1 + assert settings.compression.codec == acquire_zarr.CompressionCodec.BLOSC_ZSTD assert settings.compression.level == 5 assert settings.compression.shuffle == 2 +def test_set_dimensions(settings): + assert len(settings.dimensions) == 0 + + settings.dimensions.append(acquire_zarr.Dimension( + name="foo", + kind=acquire_zarr.DimensionType.TIME, + array_size_px=1, + chunk_size_px=2, + shard_size_chunks=3, + )) + assert len(settings.dimensions) == 1 + assert settings.dimensions[0].name == "foo" + assert settings.dimensions[0].kind == acquire_zarr.DimensionType.TIME + assert settings.dimensions[0].array_size_px == 1 + assert settings.dimensions[0].chunk_size_px == 2 + assert settings.dimensions[0].shard_size_chunks == 3 + + settings.dimensions.append(acquire_zarr.Dimension( + name="bar", + kind=acquire_zarr.DimensionType.SPACE, + array_size_px=4, + chunk_size_px=5, + shard_size_chunks=6, + )) + assert len(settings.dimensions) == 2 + assert settings.dimensions[1].name == "bar" + assert settings.dimensions[1].kind == acquire_zarr.DimensionType.SPACE + assert settings.dimensions[1].array_size_px == 4 + assert settings.dimensions[1].chunk_size_px == 5 + assert settings.dimensions[1].shard_size_chunks == 6 + + settings.dimensions.append(acquire_zarr.Dimension( + name="baz", + kind=acquire_zarr.DimensionType.OTHER, + array_size_px=7, + chunk_size_px=8, + shard_size_chunks=9, + )) + assert len(settings.dimensions) == 3 + assert settings.dimensions[2].name == "baz" + assert settings.dimensions[2].kind == acquire_zarr.DimensionType.OTHER + assert settings.dimensions[2].array_size_px == 7 + assert settings.dimensions[2].chunk_size_px == 8 + assert settings.dimensions[2].shard_size_chunks == 9 + + def test_set_multiscale(settings): assert settings.multiscale is False From 7e97dd768e550c7ec5eb3d6083e738c364425aed Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Thu, 10 Oct 2024 11:40:02 -0400 Subject: [PATCH 15/28] add python tests to CI --- .github/workflows/{test_pr.yml => test.yml} | 54 ++++++++++++++++++--- pyproject.toml | 3 +- 2 files changed, 50 insertions(+), 7 deletions(-) rename .github/workflows/{test_pr.yml => test.yml} (79%) diff --git a/.github/workflows/test_pr.yml b/.github/workflows/test.yml similarity index 79% rename from .github/workflows/test_pr.yml rename to .github/workflows/test.yml index 395527a..06f0552 100644 --- a/.github/workflows/test_pr.yml +++ b/.github/workflows/test.yml @@ -88,11 +88,6 @@ jobs: submodules: true ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python 3.10 - uses: actions/setup-python@v4 - with: - python-version: "3.10" - - name: Install minio and mcli run: | apt update && apt install -y tmux wget @@ -137,4 +132,51 @@ jobs: ZARR_S3_BUCKET_NAME: ${{ env.MINIO_BUCKET }} ZARR_S3_ACCESS_KEY_ID: ${{ env.MINIO_ACCESS_KEY }} ZARR_S3_SECRET_ACCESS_KEY: ${{ env.MINIO_SECRET_KEY }} - run: ctest -C ${{env.BUILD_TYPE}} -L s3 --output-on-failure \ No newline at end of file + run: ctest -C ${{env.BUILD_TYPE}} -L s3 --output-on-failure + + test_python: + name: Test on ${{ matrix.platform }} + runs-on: ${{ matrix.platform }} + timeout-minutes: 20 + strategy: + fail-fast: false + matrix: + platform: + - "ubuntu-latest" + - "windows-latest" + - "macos-latest" + + steps: + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.10.0 + with: + access_token: ${{ github.token }} + + - uses: actions/checkout@v3 + with: + submodules: true + ref: ${{ github.event.pull_request.head.sha }} + + - name: Set up Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install vcpkg + run: | + git clone https://github.com/microsoft/vcpkg.git + cd vcpkg && ./bootstrap-vcpkg.sh + echo "VCPKG_ROOT=${{github.workspace}}/vcpkg" >> $GITHUB_ENV + echo "${{github.workspace}}/vcpkg" >> $GITHUB_PATH + ./vcpkg integrate install + shell: bash + + - name: Install dependencies + run: python -m pip install -U pip "pybind11[global]" cmake build numpy + + - name: Build and install Python bindings + run: python -m pip install . + + - name: Run tests + run: python -m pytest -v + \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b47670c..7c5060c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,8 @@ build-backend = "setuptools.build_meta" [tool.pytest.ini_options] minversion = "6.0" -addopts = "-ra -q" +addopts = "-ra -q --color=yes" +log_cli = true # when true, messages are printed immediately testpaths = [ "python/tests", ] \ No newline at end of file From 71aacd8d9bf23a5bc974b1704a4ad3f7a404bdda Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Thu, 10 Oct 2024 13:58:25 -0400 Subject: [PATCH 16/28] (wip): first stream test (crash!) --- pyproject.toml | 33 +- python/acquire_zarr.pyi | 78 ++++ python/tests/test_settings.py | 1 - python/tests/test_stream.py | 682 ++++++++++++++++++++++++++++++++++ 4 files changed, 790 insertions(+), 4 deletions(-) create mode 100644 python/acquire_zarr.pyi create mode 100644 python/tests/test_stream.py diff --git a/pyproject.toml b/pyproject.toml index 7c5060c..a1d708a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,40 @@ [build-system] requires = [ - "setuptools>=42", - "wheel", - "ninja", "cmake>=3.12", + "ninja", "pybind11[global]", + "setuptools>=42", + "wheel", ] build-backend = "setuptools.build_meta" +[project] +name = "acquire-zarr" +requires-python = ">=3.9" +version = "0.0.1" + +[project.optional-dependencies] +testing = [ + "black", + "dask", + "mypy", + "ome-zarr", + "pytest>=7", + "pytest-cov", + "python-dotenv", + "ruff", + "s3fs", + "tifffile", + "zarr", +] + +[tool.black] +target-version = ['py39', 'py310', 'py311'] +line-length = 79 + +[tool.isort] +profile = "black" + [tool.pytest.ini_options] minversion = "6.0" addopts = "-ra -q --color=yes" diff --git a/python/acquire_zarr.pyi b/python/acquire_zarr.pyi new file mode 100644 index 0000000..81520c1 --- /dev/null +++ b/python/acquire_zarr.pyi @@ -0,0 +1,78 @@ +from typing import ( + Any, + ClassVar, + Dict, + Iterator, + List, + Optional, + Tuple, + final, + overload, +) + +class ZarrVersion: + V2: ClassVar[ZarrVersion] + V3: ClassVar[ZarrVersion] + +class DataType: + UINT8: ClassVar[DataType] + UINT16: ClassVar[DataType] + UINT32: ClassVar[DataType] + UINT64: ClassVar[DataType] + INT8: ClassVar[DataType] + INT16: ClassVar[DataType] + INT32: ClassVar[DataType] + INT64: ClassVar[DataType] + FLOAT32: ClassVar[DataType] + FLOAT64: ClassVar[DataType] + +class Compressor: + NONE: ClassVar[Compressor] + BLOSC1: ClassVar[Compressor] + +class CompressionCodec: + NONE: ClassVar[CompressionCodec] + BLOSC_LZ4: ClassVar[CompressionCodec] + BLOSC_ZSTD: ClassVar[CompressionCodec] + +class DimensionType: + SPACE: ClassVar[DimensionType] + CHANNEL: ClassVar[DimensionType] + TIME: ClassVar[DimensionType] + OTHER: ClassVar[DimensionType] + +class S3Settings: + endpoint: str + bucket_name: str + access_key_id: str + secret_access_key: str + +class CompressionSettings: + compressor: Compressor + codec: CompressionCodec + level: int + shuffle: int + +class Dimension: + name: str + kind: DimensionType + array_size_px: int + chunk_size_px: int + shard_size_chunks: int + +class StreamSettings: + store_path: str + custom_metadata: Optional[str] + s3: Optional[S3Settings] + compression: Optional[CompressionSettings] + dimensions: List[Dimension] + multiscale: bool + data_type: DataType + version: ZarrVersion + +class ZarrStream: + def append(self, data: Any) -> None: + ... + + def is_active(self) -> bool: + ... \ No newline at end of file diff --git a/python/tests/test_settings.py b/python/tests/test_settings.py index 2ca7afd..7b097df 100644 --- a/python/tests/test_settings.py +++ b/python/tests/test_settings.py @@ -3,7 +3,6 @@ import json from pathlib import Path -import numpy as np import pytest import acquire_zarr diff --git a/python/tests/test_stream.py b/python/tests/test_stream.py new file mode 100644 index 0000000..6ce4921 --- /dev/null +++ b/python/tests/test_stream.py @@ -0,0 +1,682 @@ +#!/usr/bin/env python3 + +import json +import os +from pathlib import Path +from tempfile import mkdtemp +from typing import Optional + +import dotenv +import pytest +import s3fs +import zarr +from dask import array as da +from numcodecs import blosc as blosc +from ome_zarr.io import parse_url +from ome_zarr.reader import Reader + +import acquire_zarr +from acquire_zarr import StreamSettings, Dimension, DimensionType, ZarrStream + +dotenv.load_dotenv() + + +@pytest.fixture(scope="function") +def settings(): + return StreamSettings() + + +def test_create_stream( + settings: StreamSettings, request: pytest.FixtureRequest +): + settings.store_path = f"{request.node.name}.zarr" + settings.dimensions.extend( + [ + Dimension( + name="t", + kind=DimensionType.TIME, + array_size_px=64, + chunk_size_px=64, + ), + Dimension( + name="y", + kind=DimensionType.SPACE, + array_size_px=64, + chunk_size_px=64, + ), + Dimension( + name="x", + kind=DimensionType.SPACE, + array_size_px=64, + chunk_size_px=64, + ), + ] + ) + stream = ZarrStream(settings) + assert stream + + +""" +def test_set_acquisition_dimensions( + settings: StreamSettings, request: pytest.FixtureRequest +): + dm = settings.device_manager() + props = settings.get_configuration() + props.video[0].camera.identifier = dm.select( + DeviceKind.Camera, ".*empty.*" + ) + props.video[0].camera.settings.shape = (64, 48) + + props.video[0].storage.identifier = dm.select(DeviceKind.Storage, "Zarr") + props.video[0].storage.settings.uri = str( + Path(mkdtemp()) / f"{request.node.name}.zarr" + ) + props.video[0].max_frame_count = 32 + + # configure storage dimensions + dimension_x = acquire.StorageDimension( + name="x", kind="Space", array_size_px=64, chunk_size_px=64 + ) + assert dimension_x.shard_size_chunks == 0 + + dimension_y = acquire.StorageDimension( + name="y", kind="Space", array_size_px=48, chunk_size_px=48 + ) + assert dimension_y.shard_size_chunks == 0 + + dimension_t = acquire.StorageDimension( + name="t", kind="Time", array_size_px=32, chunk_size_px=32 + ) + assert dimension_t.shard_size_chunks == 0 + + props.video[0].storage.settings.acquisition_dimensions = [ + dimension_x, + dimension_y, + dimension_t, + ] + assert len(props.video[0].storage.settings.acquisition_dimensions) == 3 + + # set and test + props = settings.set_configuration(props) + assert len(props.video[0].storage.settings.acquisition_dimensions) == 3 + + assert ( + props.video[0].storage.settings.acquisition_dimensions[0].name + == dimension_x.name + ) + assert ( + props.video[0].storage.settings.acquisition_dimensions[0].kind + == dimension_x.kind + ) + assert ( + props.video[0].storage.settings.acquisition_dimensions[0].array_size_px + == dimension_x.array_size_px + ) + assert ( + props.video[0].storage.settings.acquisition_dimensions[0].chunk_size_px + == dimension_x.chunk_size_px + ) + + assert ( + props.video[0].storage.settings.acquisition_dimensions[2].name + == dimension_t.name + ) + assert ( + props.video[0].storage.settings.acquisition_dimensions[2].kind + == dimension_t.kind + ) + assert ( + props.video[0].storage.settings.acquisition_dimensions[2].array_size_px + == dimension_t.array_size_px + ) + assert ( + props.video[0].storage.settings.acquisition_dimensions[2].chunk_size_px + == dimension_t.chunk_size_px + ) + + +def test_write_external_metadata_to_zarr( + runtime: Runtime, request: pytest.FixtureRequest +): + dm = runtime.device_manager() + props = runtime.get_configuration() + props.video[0].camera.identifier = dm.select( + DeviceKind.Camera, "simulated.*empty.*" + ) + props.video[0].camera.settings.shape = (33, 47) + props.video[0].storage.identifier = dm.select(DeviceKind.Storage, "Zarr") + props.video[0].max_frame_count = 4 + props.video[0].storage.settings.uri = str( + Path(mkdtemp()) / f"{request.node.name}.zarr" + ) + metadata = {"hello": "world"} + props.video[0].storage.settings.external_metadata_json = json.dumps( + metadata + ) + props.video[0].storage.settings.pixel_scale_um = (0.5, 4) + + # configure storage dimensions + dimension_x = acquire.StorageDimension( + name="x", kind="Space", array_size_px=33, chunk_size_px=33 + ) + assert dimension_x.shard_size_chunks == 0 + + dimension_y = acquire.StorageDimension( + name="y", kind="Space", array_size_px=47, chunk_size_px=47 + ) + assert dimension_y.shard_size_chunks == 0 + + dimension_z = acquire.StorageDimension( + name="z", kind="Space", array_size_px=0, chunk_size_px=4 + ) + assert dimension_z.shard_size_chunks == 0 + + props.video[0].storage.settings.acquisition_dimensions = [ + dimension_x, + dimension_y, + dimension_z, + ] + + props = runtime.set_configuration(props) + + nframes = 0 + runtime.start() + while nframes < props.video[0].max_frame_count: + with runtime.get_available_data(0) as packet: + nframes += packet.get_frame_count() + runtime.stop() + + assert props.video[0].storage.settings.uri + store = parse_url(props.video[0].storage.settings.uri) + assert store + reader = Reader(store) + nodes = list(reader()) + + # ome-ngff supports multiple images, in separate directories but we only + # wrote one. + multi_scale_image_node = nodes[0] + + # ome-ngff always stores multi-scale images, but we only have a single + # scale/level. + image_data = multi_scale_image_node.data[0] + assert image_data.shape == ( + props.video[0].max_frame_count, + props.video[0].camera.settings.shape[1], + props.video[0].camera.settings.shape[0], + ) + + multi_scale_image_metadata = multi_scale_image_node.metadata + + axes = multi_scale_image_metadata["axes"] + axis_names = tuple(a["name"] for a in axes) + assert axis_names == ("z", "y", "x") + + axis_types = tuple(a["type"] for a in axes) + assert axis_types == ("space", "space", "space") + + axis_units = tuple(a.get("unit") for a in axes) + assert axis_units == (None, "micrometer", "micrometer") + + # We only have one multi-scale level and one transform. + transform = multi_scale_image_metadata["coordinateTransformations"][0][0] + pixel_scale_um = tuple( + transform["scale"][axis_names.index(axis)] for axis in ("x", "y") + ) + assert pixel_scale_um == props.video[0].storage.settings.pixel_scale_um + + # ome-zarr only reads attributes it recognizes, so use a plain zarr reader + # to read external metadata instead. + group = zarr.open(props.video[0].storage.settings.uri) + assert group["0"].attrs.asdict() == metadata + + +@pytest.mark.parametrize( + ("compressor_name",), + [ + ("zstd",), + ("lz4",), + ], +) +def test_write_compressed_zarr( + runtime: Runtime, request: pytest.FixtureRequest, compressor_name +): + uri = str(Path(mkdtemp()) / f"{request.node.name}.zarr") + uri = uri.replace("[", "_").replace("]", "_") + + dm = runtime.device_manager() + p = runtime.get_configuration() + p.video[0].camera.identifier = dm.select( + DeviceKind.Camera, "simulated.*empty.*" + ) + p.video[0].camera.settings.shape = (64, 48) + p.video[0].camera.settings.exposure_time_us = 1e4 + p.video[0].storage.identifier = dm.select( + DeviceKind.Storage, + f"ZarrBlosc1{compressor_name.capitalize()}ByteShuffle", + ) + p.video[0].max_frame_count = 70 + p.video[0].storage.settings.uri = uri + metadata = {"foo": "bar"} + p.video[0].storage.settings.external_metadata_json = json.dumps(metadata) + + # configure storage dimensions + dimension_x = acquire.StorageDimension( + name="x", kind="Space", array_size_px=64, chunk_size_px=64 + ) + assert dimension_x.shard_size_chunks == 0 + + dimension_y = acquire.StorageDimension( + name="y", kind="Space", array_size_px=48, chunk_size_px=48 + ) + assert dimension_y.shard_size_chunks == 0 + + dimension_c = acquire.StorageDimension( + name="c", kind="Channel", array_size_px=1, chunk_size_px=1 + ) + assert dimension_c.shard_size_chunks == 0 + + dimension_t = acquire.StorageDimension( + name="t", kind="Time", array_size_px=0, chunk_size_px=70 + ) + assert dimension_t.shard_size_chunks == 0 + + p.video[0].storage.settings.acquisition_dimensions = [ + dimension_x, + dimension_y, + dimension_c, + dimension_t, + ] + + runtime.set_configuration(p) + + runtime.start() + runtime.stop() + + # load from Zarr + group = zarr.open(p.video[0].storage.settings.uri) + data = group["0"] + + assert data.compressor.cname == compressor_name + assert data.compressor.clevel == 1 + assert data.compressor.shuffle == blosc.SHUFFLE + + assert data.shape == ( + p.video[0].max_frame_count, + 1, + p.video[0].camera.settings.shape[1], + p.video[0].camera.settings.shape[0], + ) + assert data.attrs.asdict() == metadata + + # load from Dask + data = da.from_zarr(p.video[0].storage.settings.uri, component="0") + assert data.shape == ( + p.video[0].max_frame_count, + 1, + p.video[0].camera.settings.shape[1], + p.video[0].camera.settings.shape[0], + ) + + +@pytest.mark.parametrize( + ("number_of_frames", "expected_number_of_chunks", "compression"), + [ + (64, 4, None), + (64, 4, {"codec": "zstd", "clevel": 1, "shuffle": 1}), + (65, 8, None), # rollover + (65, 8, {"codec": "blosclz", "clevel": 2, "shuffle": 2}), # rollover + ], +) +def test_write_zarr_with_chunking( + runtime: acquire.Runtime, + request: pytest.FixtureRequest, + number_of_frames: int, + expected_number_of_chunks: int, + compression: Optional[dict], +): + dm = runtime.device_manager() + + p = runtime.get_configuration() + p.video[0].camera.identifier = dm.select( + DeviceKind.Camera, "simulated.*empty.*" + ) + p.video[0].camera.settings.shape = (1920, 1080) + p.video[0].camera.settings.exposure_time_us = 1e4 + p.video[0].camera.settings.pixel_type = acquire.SampleType.U8 + p.video[0].storage.identifier = dm.select( + DeviceKind.Storage, + "Zarr", + ) + p.video[0].storage.settings.uri = str( + Path(mkdtemp()) / f"{request.node.name}.zarr" + ) + p.video[0].max_frame_count = number_of_frames + + # configure storage dimensions + dimension_x = acquire.StorageDimension( + name="x", kind="Space", array_size_px=1920, chunk_size_px=960 + ) + assert dimension_x.shard_size_chunks == 0 + + dimension_y = acquire.StorageDimension( + name="y", kind="Space", array_size_px=1080, chunk_size_px=540 + ) + assert dimension_y.shard_size_chunks == 0 + + dimension_t = acquire.StorageDimension( + name="t", kind="Time", array_size_px=0, chunk_size_px=64 + ) + assert dimension_t.shard_size_chunks == 0 + + p.video[0].storage.settings.acquisition_dimensions = [ + dimension_x, + dimension_y, + dimension_t, + ] + + runtime.set_configuration(p) + + runtime.start() + runtime.stop() + + group = zarr.open(p.video[0].storage.settings.uri) + data = group["0"] + + assert data.chunks == (64, 540, 960) + + assert data.shape == ( + number_of_frames, + p.video[0].camera.settings.shape[1], + p.video[0].camera.settings.shape[0], + ) + assert data.nchunks == expected_number_of_chunks + + +def test_write_zarr_multiscale( + runtime: acquire.Runtime, + request: pytest.FixtureRequest, +): + uri = str(Path(mkdtemp()) / f"{request.node.name}.zarr") + uri = uri.replace("[", "_").replace("]", "_") + + dm = runtime.device_manager() + + p = runtime.get_configuration() + p.video[0].camera.identifier = dm.select( + DeviceKind.Camera, "simulated.*empty.*" + ) + p.video[0].camera.settings.shape = (1920, 1080) + p.video[0].camera.settings.exposure_time_us = 1e4 + p.video[0].camera.settings.pixel_type = acquire.SampleType.U8 + p.video[0].storage.identifier = dm.select( + DeviceKind.Storage, + "Zarr", + ) + p.video[0].storage.settings.uri = uri + p.video[0].storage.settings.pixel_scale_um = (1, 1) + p.video[0].max_frame_count = 100 + + # configure storage dimensions + dimension_x = acquire.StorageDimension( + name="x", kind="Space", array_size_px=1920, chunk_size_px=640 + ) + assert dimension_x.shard_size_chunks == 0 + + dimension_y = acquire.StorageDimension( + name="y", kind="Space", array_size_px=1080, chunk_size_px=360 + ) + assert dimension_y.shard_size_chunks == 0 + + dimension_t = acquire.StorageDimension( + name="t", kind="Time", array_size_px=0, chunk_size_px=64 + ) + assert dimension_t.shard_size_chunks == 0 + + p.video[0].storage.settings.acquisition_dimensions = [ + dimension_x, + dimension_y, + dimension_t, + ] + p.video[0].storage.settings.enable_multiscale = True + + runtime.set_configuration(p) + + runtime.start() + runtime.stop() + + reader = Reader(parse_url(uri)) + zgroup = list(reader())[0] + # loads each layer as a dask array from the Zarr dataset + data = [ + da.from_zarr(uri, component=str(i)) for i in range(len(zgroup.data)) + ] + assert len(data) == 3 # 3 layers + + +@pytest.mark.parametrize( + ("number_of_frames", "expected_number_of_chunks", "codec"), + [ + (64, 4, None), + (64, 4, "zstd"), + (65, 8, None), # rollover + (65, 8, "lz4"), # rollover + ], +) +def test_write_zarr_v3( + runtime: acquire.Runtime, + request: pytest.FixtureRequest, + number_of_frames: int, + expected_number_of_chunks: int, + codec: Optional[str], +): + dm = runtime.device_manager() + + p = runtime.get_configuration() + p.video[0].camera.identifier = dm.select( + DeviceKind.Camera, "simulated.*empty.*" + ) + + p.video[0].camera.settings.shape = (1920, 1080) + p.video[0].camera.settings.exposure_time_us = 1e4 + p.video[0].camera.settings.pixel_type = acquire.SampleType.U8 + p.video[0].storage.identifier = dm.select( + DeviceKind.Storage, + f"ZarrV3Blosc1{codec.capitalize()}ByteShuffle" if codec else "ZarrV3", + ) + p.video[0].storage.settings.uri = str( + Path(mkdtemp()) / f"{request.node.name}.zarr" + ) + p.video[0].max_frame_count = number_of_frames + + # configure storage dimensions + dimension_x = acquire.StorageDimension( + name="x", + kind="Space", + array_size_px=1920, + chunk_size_px=960, + shard_size_chunks=2, + ) + + dimension_y = acquire.StorageDimension( + name="y", + kind="Space", + array_size_px=1080, + chunk_size_px=540, + shard_size_chunks=2, + ) + + dimension_t = acquire.StorageDimension( + name="t", + kind="Time", + array_size_px=0, + chunk_size_px=64, + shard_size_chunks=1, + ) + + p.video[0].storage.settings.acquisition_dimensions = [ + dimension_x, + dimension_y, + dimension_t, + ] + + runtime.set_configuration(p) + + runtime.start() + runtime.stop() + + store = zarr.DirectoryStoreV3(p.video[0].storage.settings.uri) + group = zarr.open(store=store, mode="r") + data = group["0"] + + assert data.chunks == (64, 540, 960) + + assert data.shape == ( + number_of_frames, + p.video[0].camera.settings.shape[1], + p.video[0].camera.settings.shape[0], + ) + assert data.nchunks == expected_number_of_chunks + + +def test_metadata_with_trailing_whitespace( + runtime: Runtime, request: pytest.FixtureRequest +): + dm = runtime.device_manager() + p = runtime.get_configuration() + p.video[0].camera.identifier = dm.select( + DeviceKind.Camera, "simulated.*empty.*" + ) + p.video[0].camera.settings.shape = (64, 48) + p.video[0].camera.settings.exposure_time_us = 1e4 + p.video[0].storage.identifier = dm.select(DeviceKind.Storage, "Zarr") + p.video[0].max_frame_count = 70 + p.video[0].storage.settings.uri = str( + Path(mkdtemp()) / f"{request.node.name}.zarr" + ) + metadata = {"foo": "bar"} + p.video[0].storage.settings.external_metadata_json = ( + json.dumps(metadata) + " " + ) + + # configure storage dimensions + dimension_x = acquire.StorageDimension( + name="x", kind="Space", array_size_px=64, chunk_size_px=64 + ) + + dimension_y = acquire.StorageDimension( + name="y", kind="Space", array_size_px=48, chunk_size_px=48 + ) + + dimension_t = acquire.StorageDimension( + name="t", kind="Time", array_size_px=0, chunk_size_px=64 + ) + + p.video[0].storage.settings.acquisition_dimensions = [ + dimension_x, + dimension_y, + dimension_t, + ] + + runtime.set_configuration(p) + + runtime.start() + runtime.stop() + + # load from Zarr + group = zarr.open(p.video[0].storage.settings.uri) + data = group["0"] + + assert data.attrs.asdict() == metadata + + +def test_write_zarr_to_s3(runtime: Runtime, request: pytest.FixtureRequest): + required_env_vars = [ + "ZARR_S3_ENDPOINT", + "ZARR_S3_BUCKET_NAME", + "ZARR_S3_ACCESS_KEY_ID", + "ZARR_S3_SECRET_ACCESS_KEY", + ] + + for var in required_env_vars: + if var not in os.environ: + pytest.skip(f"{var} not set") + + zarr_s3_endpoint = os.environ["ZARR_S3_ENDPOINT"] + zarr_s3_bucket_name = os.environ["ZARR_S3_BUCKET_NAME"] + zarr_s3_access_key_id = os.environ["ZARR_S3_ACCESS_KEY_ID"] + zarr_s3_secret_access_key = os.environ["ZARR_S3_SECRET_ACCESS_KEY"] + + dm = runtime.device_manager() + props = runtime.get_configuration() + video = props.video[0] + + video.camera.identifier = dm.select( + DeviceKind.Camera, "simulated.*empty.*" + ) + video.camera.settings.shape = (1920, 1080) + video.camera.settings.exposure_time_us = 1e4 + video.camera.settings.pixel_type = acquire.SampleType.U8 + + video.storage.identifier = dm.select( + DeviceKind.Storage, + "Zarr", + ) + video.storage.settings.uri = ( + f"{zarr_s3_endpoint}/{zarr_s3_bucket_name}/{request.node.name}.zarr" + ) + video.storage.settings.s3_access_key_id = zarr_s3_access_key_id + video.storage.settings.s3_secret_access_key = zarr_s3_secret_access_key + + video.max_frame_count = 64 + + # configure storage dimensions + dimension_x = acquire.StorageDimension( + name="x", kind="Space", array_size_px=1920, chunk_size_px=960 + ) + assert dimension_x.shard_size_chunks == 0 + + dimension_y = acquire.StorageDimension( + name="y", kind="Space", array_size_px=1080, chunk_size_px=540 + ) + assert dimension_y.shard_size_chunks == 0 + + dimension_t = acquire.StorageDimension( + name="t", kind="Time", array_size_px=0, chunk_size_px=64 + ) + assert dimension_t.shard_size_chunks == 0 + + video.storage.settings.acquisition_dimensions = [ + dimension_x, + dimension_y, + dimension_t, + ] + + runtime.set_configuration(props) + + runtime.start() + runtime.stop() + + s3 = s3fs.S3FileSystem( + key=zarr_s3_access_key_id, + secret=zarr_s3_secret_access_key, + client_kwargs={"endpoint_url": zarr_s3_endpoint}, + ) + store = s3fs.S3Map( + root=f"{zarr_s3_bucket_name}/{request.node.name}.zarr", s3=s3 + ) + cache = zarr.LRUStoreCache(store, max_size=2**28) + group = zarr.group(store=cache) + + data = group["0"] + + assert data.chunks == (64, 540, 960) + assert data.shape == ( + 64, + video.camera.settings.shape[1], + video.camera.settings.shape[0], + ) + assert data.nchunks == 4 + + # cleanup + s3.rm(f"{zarr_s3_bucket_name}/{request.node.name}.zarr", recursive=True) +""" From 4deaecd6ad0f6a120869b5949acb454232b51174 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Thu, 10 Oct 2024 15:23:49 -0400 Subject: [PATCH 17/28] Remove deprecated workflows --- .github/workflows/build.yml | 37 +++++++++++++++-------------------- .github/workflows/release.yml | 22 ++++++++++----------- .github/workflows/test.yml | 29 ++++++++++++--------------- 3 files changed, 39 insertions(+), 49 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a2723f4..57a189a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -4,9 +4,6 @@ on: push: branches: - "main" - pull_request: # TODO (aliddell): remove this - branches: - - "main" jobs: windows-and-linux-build: @@ -29,12 +26,11 @@ jobs: permissions: actions: write - steps: - - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.10.0 - with: - access_token: ${{ github.token }} + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + steps: - uses: actions/checkout@v3 with: submodules: true @@ -58,7 +54,7 @@ jobs: cmake --build ${{github.workspace}}/build --config ${{matrix.build_type}} cpack --config ${{github.workspace}}/build/CPackConfig.cmake -C ${{matrix.build_type}} -G ZIP - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: ${{matrix.platform}} ${{matrix.build_type}} binaries path: ${{github.workspace}}/*.zip @@ -75,12 +71,11 @@ jobs: permissions: actions: write - steps: - - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.10.0 - with: - access_token: ${{ github.token }} + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + steps: - uses: actions/checkout@v3 with: submodules: true @@ -114,7 +109,7 @@ jobs: run: | cpack --config ${{github.workspace}}/build/CPackConfig.cmake -C ${{matrix.build_type}} -G ZIP - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: macos-latest ${{matrix.build_type}} binaries path: ${{github.workspace}}/*.zip @@ -132,12 +127,11 @@ jobs: permissions: actions: write - steps: - - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.10.0 - with: - access_token: ${{ github.token }} + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + steps: - uses: actions/checkout@v3 with: submodules: true @@ -163,6 +157,7 @@ jobs: run: python -m build - name: Upload wheel - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: + name: ${{matrix.platform}} wheel path: ${{github.workspace}}/dist/*.whl diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 070250a..96da84b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -27,12 +27,11 @@ jobs: permissions: actions: write - steps: - - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.10.0 - with: - access_token: ${{ github.token }} + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + steps: - uses: actions/checkout@v3 with: submodules: true @@ -61,7 +60,7 @@ jobs: cmake --build ${{github.workspace}}/pack --config Release cpack --config ${{github.workspace}}/pack/CPackConfig.cmake -C Release -G ZIP - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: ${{matrix.platform}} binaries path: ${{github.workspace}}/*.zip @@ -72,12 +71,11 @@ jobs: permissions: actions: write - steps: - - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.10.0 - with: - access_token: ${{ github.token }} + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + steps: - uses: actions/checkout@v3 with: submodules: true @@ -115,7 +113,7 @@ jobs: run: | cpack --config ${{github.workspace}}/build/CPackConfig.cmake -C Release -G ZIP - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: macos-latest binaries path: ${{github.workspace}}/*.zip diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1704f4f..a723f73 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,12 +33,11 @@ jobs: permissions: actions: write - steps: - - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.10.0 - with: - access_token: ${{ github.token }} + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + steps: - uses: actions/checkout@v3 with: submodules: true @@ -77,12 +76,11 @@ jobs: MINIO_ACCESS_KEY: acquire MINIO_SECRET_KEY: 12345678 - steps: - - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.10.0 - with: - access_token: ${{ github.token }} + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + steps: - uses: actions/checkout@v3 with: submodules: true @@ -146,12 +144,11 @@ jobs: - "windows-latest" - "macos-latest" - steps: - - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.10.0 - with: - access_token: ${{ github.token }} + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + steps: - uses: actions/checkout@v3 with: submodules: true @@ -175,7 +172,7 @@ jobs: run: python -m pip install -U pip "pybind11[global]" cmake build numpy pytest - name: Build and install Python bindings - run: python -m pip install . + run: python -m pip install ".[testing]" - name: Run tests run: python -m pytest -v From 30a844ce1bf40d7ec5d4e86c659cb367804270f5 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Fri, 11 Oct 2024 10:00:32 -0400 Subject: [PATCH 18/28] A rectification of names --- .github/workflows/build.yml | 1 + .github/workflows/release.yml | 2 ++ .github/workflows/test.yml | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 57a189a..0c46a95 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,6 +7,7 @@ on: jobs: windows-and-linux-build: + name: Build on ${{ matrix.platform }} with ${{ matrix.build_type }} configuration strategy: matrix: build_type: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 96da84b..5259927 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -11,6 +11,7 @@ env: jobs: windows-and-linux-build: + name: Build on ${{ matrix.platform }} strategy: matrix: platform: @@ -66,6 +67,7 @@ jobs: path: ${{github.workspace}}/*.zip mac-build: + name: Build on macos-latest runs-on: "macos-latest" permissions: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a723f73..901941d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -133,7 +133,7 @@ jobs: run: ctest -C ${{env.BUILD_TYPE}} -L s3 --output-on-failure test_python: - name: Test on ${{ matrix.platform }} + name: Test Python on ${{ matrix.platform }} runs-on: ${{ matrix.platform }} timeout-minutes: 20 strategy: From 70f4f4785e1ee00828533f8be0b2566d98fc09b2 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Fri, 11 Oct 2024 10:01:37 -0400 Subject: [PATCH 19/28] whitespace --- src/streaming/zarr.stream.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/streaming/zarr.stream.cpp b/src/streaming/zarr.stream.cpp index ccb3d01..33dc17a 100644 --- a/src/streaming/zarr.stream.cpp +++ b/src/streaming/zarr.stream.cpp @@ -587,8 +587,8 @@ ZarrStream_s::create_store_() { std::error_code ec; if (!fs::create_directories(store_path_, ec)) { - set_error_("Failed to create store path '" + - store_path_ + "': " + ec.message()); + set_error_("Failed to create store path '" + store_path_ + + "': " + ec.message()); return false; } } @@ -647,7 +647,6 @@ ZarrStream_s::create_writers_() writers_.push_back(std::make_unique( downsampled_config, thread_pool_, s3_connection_pool_)); } - // scaled_frames_.emplace(level++, std::nullopt); config = std::move(downsampled_config); downsampled_config = {}; @@ -882,7 +881,8 @@ ZarrStream_s::write_multiscale_frames_(const std::byte* data, return; } - std::function scale; + std::function + scale; std::function average2; switch (dtype_) { From b68a634a5ecc483ffe7c7d9970eea733b1c848b9 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Fri, 11 Oct 2024 10:01:50 -0400 Subject: [PATCH 20/28] Fix double free error --- python/acquire-zarr-py.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/python/acquire-zarr-py.cpp b/python/acquire-zarr-py.cpp index ae78506..2eb6d33 100644 --- a/python/acquire-zarr-py.cpp +++ b/python/acquire-zarr-py.cpp @@ -340,13 +340,6 @@ class PyZarrStream } } - ~PyZarrStream() - { - if (is_active()) { - ZarrStream_destroy(stream_.get()); - } - } - void append(py::array image_data) { if (!is_active()) { From 28b28c6b75ae5d7a1b9dae3480fde785fa53a21a Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Fri, 11 Oct 2024 12:19:47 -0400 Subject: [PATCH 21/28] Slightly improve debug output. --- src/streaming/zarr.stream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/streaming/zarr.stream.cpp b/src/streaming/zarr.stream.cpp index 33dc17a..c31e43d 100644 --- a/src/streaming/zarr.stream.cpp +++ b/src/streaming/zarr.stream.cpp @@ -151,7 +151,7 @@ validate_custom_metadata(const char* metadata) ); if (val.is_discarded()) { - LOG_ERROR("Invalid JSON: ", metadata); + LOG_ERROR("Invalid JSON: '", metadata, "'"); return false; } From 6aabdd3a9d52fb3b810ab3280314caa29eb9f9d6 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Fri, 11 Oct 2024 12:20:02 -0400 Subject: [PATCH 22/28] Add some more streaming tests. --- python/tests/test_stream.py | 892 ++++++++++++------------------------ 1 file changed, 300 insertions(+), 592 deletions(-) diff --git a/python/tests/test_stream.py b/python/tests/test_stream.py index 6ce4921..c96d8cf 100644 --- a/python/tests/test_stream.py +++ b/python/tests/test_stream.py @@ -1,682 +1,390 @@ #!/usr/bin/env python3 +import dotenv + +dotenv.load_dotenv() + import json -import os from pathlib import Path -from tempfile import mkdtemp +import os +import shutil from typing import Optional -import dotenv +os.environ["ZARR_V3_EXPERIMENTAL_API"] = "1" +os.environ["ZARR_V3_SHARDING"] = "1" + +import numpy as np import pytest -import s3fs import zarr -from dask import array as da -from numcodecs import blosc as blosc -from ome_zarr.io import parse_url -from ome_zarr.reader import Reader - -import acquire_zarr -from acquire_zarr import StreamSettings, Dimension, DimensionType, ZarrStream +from numcodecs import blosc +import s3fs -dotenv.load_dotenv() +from acquire_zarr import ( + StreamSettings, + ZarrStream, + Compressor, + CompressionCodec, + CompressionSettings, + S3Settings, + Dimension, + DimensionType, + ZarrVersion, +) @pytest.fixture(scope="function") def settings(): - return StreamSettings() - - -def test_create_stream( - settings: StreamSettings, request: pytest.FixtureRequest -): - settings.store_path = f"{request.node.name}.zarr" - settings.dimensions.extend( + s = StreamSettings() + s.custom_metadata = json.dumps({"foo": "bar"}) + s.dimensions.extend( [ Dimension( name="t", kind=DimensionType.TIME, - array_size_px=64, - chunk_size_px=64, + array_size_px=0, + chunk_size_px=32, + shard_size_chunks=1, ), Dimension( name="y", kind=DimensionType.SPACE, - array_size_px=64, - chunk_size_px=64, + array_size_px=48, + chunk_size_px=16, + shard_size_chunks=1, ), Dimension( name="x", kind=DimensionType.SPACE, array_size_px=64, - chunk_size_px=64, + chunk_size_px=32, + shard_size_chunks=1, ), ] ) - stream = ZarrStream(settings) - assert stream - - -""" -def test_set_acquisition_dimensions( - settings: StreamSettings, request: pytest.FixtureRequest -): - dm = settings.device_manager() - props = settings.get_configuration() - props.video[0].camera.identifier = dm.select( - DeviceKind.Camera, ".*empty.*" - ) - props.video[0].camera.settings.shape = (64, 48) - - props.video[0].storage.identifier = dm.select(DeviceKind.Storage, "Zarr") - props.video[0].storage.settings.uri = str( - Path(mkdtemp()) / f"{request.node.name}.zarr" - ) - props.video[0].max_frame_count = 32 - - # configure storage dimensions - dimension_x = acquire.StorageDimension( - name="x", kind="Space", array_size_px=64, chunk_size_px=64 - ) - assert dimension_x.shard_size_chunks == 0 - - dimension_y = acquire.StorageDimension( - name="y", kind="Space", array_size_px=48, chunk_size_px=48 - ) - assert dimension_y.shard_size_chunks == 0 - - dimension_t = acquire.StorageDimension( - name="t", kind="Time", array_size_px=32, chunk_size_px=32 - ) - assert dimension_t.shard_size_chunks == 0 - - props.video[0].storage.settings.acquisition_dimensions = [ - dimension_x, - dimension_y, - dimension_t, - ] - assert len(props.video[0].storage.settings.acquisition_dimensions) == 3 - - # set and test - props = settings.set_configuration(props) - assert len(props.video[0].storage.settings.acquisition_dimensions) == 3 - - assert ( - props.video[0].storage.settings.acquisition_dimensions[0].name - == dimension_x.name - ) - assert ( - props.video[0].storage.settings.acquisition_dimensions[0].kind - == dimension_x.kind - ) - assert ( - props.video[0].storage.settings.acquisition_dimensions[0].array_size_px - == dimension_x.array_size_px - ) - assert ( - props.video[0].storage.settings.acquisition_dimensions[0].chunk_size_px - == dimension_x.chunk_size_px - ) - - assert ( - props.video[0].storage.settings.acquisition_dimensions[2].name - == dimension_t.name - ) - assert ( - props.video[0].storage.settings.acquisition_dimensions[2].kind - == dimension_t.kind - ) - assert ( - props.video[0].storage.settings.acquisition_dimensions[2].array_size_px - == dimension_t.array_size_px - ) - assert ( - props.video[0].storage.settings.acquisition_dimensions[2].chunk_size_px - == dimension_t.chunk_size_px - ) + return s -def test_write_external_metadata_to_zarr( - runtime: Runtime, request: pytest.FixtureRequest -): - dm = runtime.device_manager() - props = runtime.get_configuration() - props.video[0].camera.identifier = dm.select( - DeviceKind.Camera, "simulated.*empty.*" - ) - props.video[0].camera.settings.shape = (33, 47) - props.video[0].storage.identifier = dm.select(DeviceKind.Storage, "Zarr") - props.video[0].max_frame_count = 4 - props.video[0].storage.settings.uri = str( - Path(mkdtemp()) / f"{request.node.name}.zarr" - ) - metadata = {"hello": "world"} - props.video[0].storage.settings.external_metadata_json = json.dumps( - metadata - ) - props.video[0].storage.settings.pixel_scale_um = (0.5, 4) - # configure storage dimensions - dimension_x = acquire.StorageDimension( - name="x", kind="Space", array_size_px=33, chunk_size_px=33 - ) - assert dimension_x.shard_size_chunks == 0 - - dimension_y = acquire.StorageDimension( - name="y", kind="Space", array_size_px=47, chunk_size_px=47 - ) - assert dimension_y.shard_size_chunks == 0 - - dimension_z = acquire.StorageDimension( - name="z", kind="Space", array_size_px=0, chunk_size_px=4 - ) - assert dimension_z.shard_size_chunks == 0 - - props.video[0].storage.settings.acquisition_dimensions = [ - dimension_x, - dimension_y, - dimension_z, - ] - - props = runtime.set_configuration(props) - - nframes = 0 - runtime.start() - while nframes < props.video[0].max_frame_count: - with runtime.get_available_data(0) as packet: - nframes += packet.get_frame_count() - runtime.stop() - - assert props.video[0].storage.settings.uri - store = parse_url(props.video[0].storage.settings.uri) - assert store - reader = Reader(store) - nodes = list(reader()) - - # ome-ngff supports multiple images, in separate directories but we only - # wrote one. - multi_scale_image_node = nodes[0] - - # ome-ngff always stores multi-scale images, but we only have a single - # scale/level. - image_data = multi_scale_image_node.data[0] - assert image_data.shape == ( - props.video[0].max_frame_count, - props.video[0].camera.settings.shape[1], - props.video[0].camera.settings.shape[0], - ) - - multi_scale_image_metadata = multi_scale_image_node.metadata - - axes = multi_scale_image_metadata["axes"] - axis_names = tuple(a["name"] for a in axes) - assert axis_names == ("z", "y", "x") - - axis_types = tuple(a["type"] for a in axes) - assert axis_types == ("space", "space", "space") - - axis_units = tuple(a.get("unit") for a in axes) - assert axis_units == (None, "micrometer", "micrometer") - - # We only have one multi-scale level and one transform. - transform = multi_scale_image_metadata["coordinateTransformations"][0][0] - pixel_scale_um = tuple( - transform["scale"][axis_names.index(axis)] for axis in ("x", "y") +@pytest.fixture(scope="function") +def store_path(tmp_path): + yield tmp_path + shutil.rmtree(tmp_path) + + +def validate_v2_metadata(store_path: Path): + assert (store_path / ".zattrs").is_file() + with open(store_path / ".zattrs", "r") as fh: + data = json.load(fh) + axes = data["multiscales"][0]["axes"] + assert axes[0]["name"] == "t" + assert axes[0]["type"] == "time" + + assert axes[1]["name"] == "y" + assert axes[1]["type"] == "space" + assert axes[1]["unit"] == "micrometer" + + assert axes[2]["name"] == "x" + assert axes[2]["type"] == "space" + assert axes[2]["unit"] == "micrometer" + + assert (store_path / ".zgroup").is_file() + with open(store_path / ".zgroup", "r") as fh: + data = json.load(fh) + assert data["zarr_format"] == 2 + + assert (store_path / "acquire.json").is_file() + with open(store_path / "acquire.json", "r") as fh: + data = json.load(fh) + assert data["foo"] == "bar" + + assert (store_path / "0").is_dir() + + +def validate_v3_metadata(store_path: Path): + assert (store_path / "zarr.json").is_file() + with open(store_path / "zarr.json", "r") as fh: + data = json.load(fh) + assert data["extensions"] == [] + assert ( + data["metadata_encoding"] + == "https://purl.org/zarr/spec/protocol/core/3.0" + ) + assert ( + data["zarr_format"] + == "https://purl.org/zarr/spec/protocol/core/3.0" + ) + assert data["metadata_key_suffix"] == ".json" + + assert (store_path / "meta").is_dir() + assert (store_path / "meta" / "root.group.json").is_file() + with open(store_path / "meta" / "root.group.json", "r") as fh: + data = json.load(fh) + axes = data["attributes"]["multiscales"][0]["axes"] + assert axes[0]["name"] == "t" + assert axes[0]["type"] == "time" + + assert axes[1]["name"] == "y" + assert axes[1]["type"] == "space" + assert axes[1]["unit"] == "micrometer" + + assert axes[2]["name"] == "x" + assert axes[2]["type"] == "space" + assert axes[2]["unit"] == "micrometer" + + assert (store_path / "meta" / "acquire.json").is_file() + with open(store_path / "meta" / "acquire.json", "r") as fh: + data = json.load(fh) + assert data["foo"] == "bar" + + +def get_directory_store(version: ZarrVersion, store_path: str): + if version == ZarrVersion.V2: + return zarr.DirectoryStore(store_path) + else: + return zarr.DirectoryStoreV3(store_path) + +def make_s3_settings(store_path: str): + if "ZARR_S3_ENDPOINT" not in os.environ or "ZARR_S3_BUCKET_NAME" not in os.environ or "ZARR_S3_ACCESS_KEY_ID" not in os.environ or "ZARR_S3_SECRET_ACCESS_KEY" not in os.environ: + return None + + return S3Settings( + endpoint=os.environ["ZARR_S3_ENDPOINT"], + bucket_name=os.environ["ZARR_S3_BUCKET_NAME"], + access_key_id=os.environ["ZARR_S3_ACCESS_KEY_ID"], + secret_access_key=os.environ["ZARR_S3_SECRET_ACCESS_KEY"], ) - assert pixel_scale_um == props.video[0].storage.settings.pixel_scale_um - - # ome-zarr only reads attributes it recognizes, so use a plain zarr reader - # to read external metadata instead. - group = zarr.open(props.video[0].storage.settings.uri) - assert group["0"].attrs.asdict() == metadata @pytest.mark.parametrize( - ("compressor_name",), + ("version",), [ - ("zstd",), - ("lz4",), + (ZarrVersion.V2,), + (ZarrVersion.V3,), ], ) -def test_write_compressed_zarr( - runtime: Runtime, request: pytest.FixtureRequest, compressor_name +def test_create_stream( + settings: StreamSettings, + store_path: Path, + request: pytest.FixtureRequest, + version: ZarrVersion, ): - uri = str(Path(mkdtemp()) / f"{request.node.name}.zarr") - uri = uri.replace("[", "_").replace("]", "_") - - dm = runtime.device_manager() - p = runtime.get_configuration() - p.video[0].camera.identifier = dm.select( - DeviceKind.Camera, "simulated.*empty.*" - ) - p.video[0].camera.settings.shape = (64, 48) - p.video[0].camera.settings.exposure_time_us = 1e4 - p.video[0].storage.identifier = dm.select( - DeviceKind.Storage, - f"ZarrBlosc1{compressor_name.capitalize()}ByteShuffle", - ) - p.video[0].max_frame_count = 70 - p.video[0].storage.settings.uri = uri - metadata = {"foo": "bar"} - p.video[0].storage.settings.external_metadata_json = json.dumps(metadata) - - # configure storage dimensions - dimension_x = acquire.StorageDimension( - name="x", kind="Space", array_size_px=64, chunk_size_px=64 - ) - assert dimension_x.shard_size_chunks == 0 - - dimension_y = acquire.StorageDimension( - name="y", kind="Space", array_size_px=48, chunk_size_px=48 - ) - assert dimension_y.shard_size_chunks == 0 - - dimension_c = acquire.StorageDimension( - name="c", kind="Channel", array_size_px=1, chunk_size_px=1 - ) - assert dimension_c.shard_size_chunks == 0 - - dimension_t = acquire.StorageDimension( - name="t", kind="Time", array_size_px=0, chunk_size_px=70 - ) - assert dimension_t.shard_size_chunks == 0 - - p.video[0].storage.settings.acquisition_dimensions = [ - dimension_x, - dimension_y, - dimension_c, - dimension_t, - ] + settings.store_path = str(store_path / f"{request.node.name}.zarr") + settings.version = version + stream = ZarrStream(settings) + assert stream - runtime.set_configuration(p) + store_path = Path(settings.store_path) - runtime.start() - runtime.stop() + del stream # close the stream, flush the files - # load from Zarr - group = zarr.open(p.video[0].storage.settings.uri) - data = group["0"] + # check that the stream created the zarr store + assert store_path.is_dir() - assert data.compressor.cname == compressor_name - assert data.compressor.clevel == 1 - assert data.compressor.shuffle == blosc.SHUFFLE + if version == ZarrVersion.V2: + validate_v2_metadata(store_path) - assert data.shape == ( - p.video[0].max_frame_count, - 1, - p.video[0].camera.settings.shape[1], - p.video[0].camera.settings.shape[0], - ) - assert data.attrs.asdict() == metadata + # no data written, so no array metadata + assert not (store_path / "0" / ".zarray").exists() + else: + validate_v3_metadata(store_path) - # load from Dask - data = da.from_zarr(p.video[0].storage.settings.uri, component="0") - assert data.shape == ( - p.video[0].max_frame_count, - 1, - p.video[0].camera.settings.shape[1], - p.video[0].camera.settings.shape[0], - ) + # no data written, so no array metadata + assert not (store_path / "meta" / "0.array.json").exists() @pytest.mark.parametrize( - ("number_of_frames", "expected_number_of_chunks", "compression"), + ( + "version", + "compression_codec", + ), [ - (64, 4, None), - (64, 4, {"codec": "zstd", "clevel": 1, "shuffle": 1}), - (65, 8, None), # rollover - (65, 8, {"codec": "blosclz", "clevel": 2, "shuffle": 2}), # rollover + ( + ZarrVersion.V2, + None, + ), + ( + ZarrVersion.V2, + CompressionCodec.BLOSC_LZ4, + ), + ( + ZarrVersion.V2, + CompressionCodec.BLOSC_ZSTD, + ), + ( + ZarrVersion.V3, + None, + ), + ( + ZarrVersion.V3, + CompressionCodec.BLOSC_LZ4, + ), + ( + ZarrVersion.V3, + CompressionCodec.BLOSC_ZSTD, + ), ], ) -def test_write_zarr_with_chunking( - runtime: acquire.Runtime, +def test_stream_data_to_filesystem( + settings: StreamSettings, + store_path: Path, request: pytest.FixtureRequest, - number_of_frames: int, - expected_number_of_chunks: int, - compression: Optional[dict], + version: ZarrVersion, + compression_codec: Optional[CompressionCodec], ): - dm = runtime.device_manager() + settings.store_path = str(store_path / f"{request.node.name}.zarr") + settings.version = version + if compression_codec is not None: + settings.compression = CompressionSettings( + compressor=Compressor.BLOSC1, + codec=compression_codec, + level=1, + shuffle=1, + ) - p = runtime.get_configuration() - p.video[0].camera.identifier = dm.select( - DeviceKind.Camera, "simulated.*empty.*" - ) - p.video[0].camera.settings.shape = (1920, 1080) - p.video[0].camera.settings.exposure_time_us = 1e4 - p.video[0].camera.settings.pixel_type = acquire.SampleType.U8 - p.video[0].storage.identifier = dm.select( - DeviceKind.Storage, - "Zarr", - ) - p.video[0].storage.settings.uri = str( - Path(mkdtemp()) / f"{request.node.name}.zarr" - ) - p.video[0].max_frame_count = number_of_frames + stream = ZarrStream(settings) + assert stream - # configure storage dimensions - dimension_x = acquire.StorageDimension( - name="x", kind="Space", array_size_px=1920, chunk_size_px=960 + data = np.random.randint( + 0, + 255, + ( + settings.dimensions[0].chunk_size_px, + settings.dimensions[1].array_size_px, + settings.dimensions[2].array_size_px, + ), + dtype=np.uint8, ) - assert dimension_x.shard_size_chunks == 0 + stream.append(data) - dimension_y = acquire.StorageDimension( - name="y", kind="Space", array_size_px=1080, chunk_size_px=540 - ) - assert dimension_y.shard_size_chunks == 0 + del stream # close the stream, flush the files - dimension_t = acquire.StorageDimension( - name="t", kind="Time", array_size_px=0, chunk_size_px=64 + group = zarr.open( + store=get_directory_store(version, settings.store_path), mode="r" ) - assert dimension_t.shard_size_chunks == 0 - - p.video[0].storage.settings.acquisition_dimensions = [ - dimension_x, - dimension_y, - dimension_t, - ] - - runtime.set_configuration(p) - - runtime.start() - runtime.stop() - - group = zarr.open(p.video[0].storage.settings.uri) data = group["0"] - assert data.chunks == (64, 540, 960) - assert data.shape == ( - number_of_frames, - p.video[0].camera.settings.shape[1], - p.video[0].camera.settings.shape[0], + settings.dimensions[0].chunk_size_px, + settings.dimensions[1].array_size_px, + settings.dimensions[2].array_size_px, ) - assert data.nchunks == expected_number_of_chunks - -def test_write_zarr_multiscale( - runtime: acquire.Runtime, - request: pytest.FixtureRequest, -): - uri = str(Path(mkdtemp()) / f"{request.node.name}.zarr") - uri = uri.replace("[", "_").replace("]", "_") - - dm = runtime.device_manager() - - p = runtime.get_configuration() - p.video[0].camera.identifier = dm.select( - DeviceKind.Camera, "simulated.*empty.*" - ) - p.video[0].camera.settings.shape = (1920, 1080) - p.video[0].camera.settings.exposure_time_us = 1e4 - p.video[0].camera.settings.pixel_type = acquire.SampleType.U8 - p.video[0].storage.identifier = dm.select( - DeviceKind.Storage, - "Zarr", - ) - p.video[0].storage.settings.uri = uri - p.video[0].storage.settings.pixel_scale_um = (1, 1) - p.video[0].max_frame_count = 100 - - # configure storage dimensions - dimension_x = acquire.StorageDimension( - name="x", kind="Space", array_size_px=1920, chunk_size_px=640 - ) - assert dimension_x.shard_size_chunks == 0 - - dimension_y = acquire.StorageDimension( - name="y", kind="Space", array_size_px=1080, chunk_size_px=360 - ) - assert dimension_y.shard_size_chunks == 0 - - dimension_t = acquire.StorageDimension( - name="t", kind="Time", array_size_px=0, chunk_size_px=64 - ) - assert dimension_t.shard_size_chunks == 0 - - p.video[0].storage.settings.acquisition_dimensions = [ - dimension_x, - dimension_y, - dimension_t, - ] - p.video[0].storage.settings.enable_multiscale = True - - runtime.set_configuration(p) - - runtime.start() - runtime.stop() - - reader = Reader(parse_url(uri)) - zgroup = list(reader())[0] - # loads each layer as a dask array from the Zarr dataset - data = [ - da.from_zarr(uri, component=str(i)) for i in range(len(zgroup.data)) - ] - assert len(data) == 3 # 3 layers + if compression_codec is not None: + cname = ( + "lz4" + if compression_codec == CompressionCodec.BLOSC_LZ4 + else "zstd" + ) + assert data.compressor.cname == cname + assert data.compressor.clevel == 1 + assert data.compressor.shuffle == blosc.SHUFFLE + else: + assert data.compressor is None @pytest.mark.parametrize( - ("number_of_frames", "expected_number_of_chunks", "codec"), + ( + "version", + "compression_codec", + ), [ - (64, 4, None), - (64, 4, "zstd"), - (65, 8, None), # rollover - (65, 8, "lz4"), # rollover + ( + ZarrVersion.V2, + None, + ), + ( + ZarrVersion.V2, + CompressionCodec.BLOSC_LZ4, + ), + ( + ZarrVersion.V2, + CompressionCodec.BLOSC_ZSTD, + ), + ( + ZarrVersion.V3, + None, + ), + ( + ZarrVersion.V3, + CompressionCodec.BLOSC_LZ4, + ), + ( + ZarrVersion.V3, + CompressionCodec.BLOSC_ZSTD, + ), ], ) -def test_write_zarr_v3( - runtime: acquire.Runtime, +@pytest.mark.skip(reason="Temporary; needs debugging") +def test_stream_data_to_s3( + settings: StreamSettings, + store_path: Path, request: pytest.FixtureRequest, - number_of_frames: int, - expected_number_of_chunks: int, - codec: Optional[str], + version: ZarrVersion, + compression_codec: Optional[CompressionCodec], ): - dm = runtime.device_manager() - - p = runtime.get_configuration() - p.video[0].camera.identifier = dm.select( - DeviceKind.Camera, "simulated.*empty.*" - ) - - p.video[0].camera.settings.shape = (1920, 1080) - p.video[0].camera.settings.exposure_time_us = 1e4 - p.video[0].camera.settings.pixel_type = acquire.SampleType.U8 - p.video[0].storage.identifier = dm.select( - DeviceKind.Storage, - f"ZarrV3Blosc1{codec.capitalize()}ByteShuffle" if codec else "ZarrV3", - ) - p.video[0].storage.settings.uri = str( - Path(mkdtemp()) / f"{request.node.name}.zarr" - ) - p.video[0].max_frame_count = number_of_frames - - # configure storage dimensions - dimension_x = acquire.StorageDimension( - name="x", - kind="Space", - array_size_px=1920, - chunk_size_px=960, - shard_size_chunks=2, - ) - - dimension_y = acquire.StorageDimension( - name="y", - kind="Space", - array_size_px=1080, - chunk_size_px=540, - shard_size_chunks=2, - ) - - dimension_t = acquire.StorageDimension( - name="t", - kind="Time", - array_size_px=0, - chunk_size_px=64, - shard_size_chunks=1, - ) - - p.video[0].storage.settings.acquisition_dimensions = [ - dimension_x, - dimension_y, - dimension_t, - ] - - runtime.set_configuration(p) - - runtime.start() - runtime.stop() - - store = zarr.DirectoryStoreV3(p.video[0].storage.settings.uri) - group = zarr.open(store=store, mode="r") - data = group["0"] - - assert data.chunks == (64, 540, 960) + s3_settings = make_s3_settings(store_path) + if s3_settings is None: + pytest.skip("S3 settings not set") + + settings.store_path = str(store_path / f"{request.node.name}.zarr") + settings.version = version + settings.s3 = s3_settings + if compression_codec is not None: + settings.compression = CompressionSettings( + compressor=Compressor.BLOSC1, + codec=compression_codec, + level=1, + shuffle=1, + ) - assert data.shape == ( - number_of_frames, - p.video[0].camera.settings.shape[1], - p.video[0].camera.settings.shape[0], - ) - assert data.nchunks == expected_number_of_chunks - - -def test_metadata_with_trailing_whitespace( - runtime: Runtime, request: pytest.FixtureRequest -): - dm = runtime.device_manager() - p = runtime.get_configuration() - p.video[0].camera.identifier = dm.select( - DeviceKind.Camera, "simulated.*empty.*" - ) - p.video[0].camera.settings.shape = (64, 48) - p.video[0].camera.settings.exposure_time_us = 1e4 - p.video[0].storage.identifier = dm.select(DeviceKind.Storage, "Zarr") - p.video[0].max_frame_count = 70 - p.video[0].storage.settings.uri = str( - Path(mkdtemp()) / f"{request.node.name}.zarr" - ) - metadata = {"foo": "bar"} - p.video[0].storage.settings.external_metadata_json = ( - json.dumps(metadata) + " " - ) - - # configure storage dimensions - dimension_x = acquire.StorageDimension( - name="x", kind="Space", array_size_px=64, chunk_size_px=64 - ) - - dimension_y = acquire.StorageDimension( - name="y", kind="Space", array_size_px=48, chunk_size_px=48 - ) - - dimension_t = acquire.StorageDimension( - name="t", kind="Time", array_size_px=0, chunk_size_px=64 - ) - - p.video[0].storage.settings.acquisition_dimensions = [ - dimension_x, - dimension_y, - dimension_t, - ] - - runtime.set_configuration(p) - - runtime.start() - runtime.stop() - - # load from Zarr - group = zarr.open(p.video[0].storage.settings.uri) - data = group["0"] - - assert data.attrs.asdict() == metadata - - -def test_write_zarr_to_s3(runtime: Runtime, request: pytest.FixtureRequest): - required_env_vars = [ - "ZARR_S3_ENDPOINT", - "ZARR_S3_BUCKET_NAME", - "ZARR_S3_ACCESS_KEY_ID", - "ZARR_S3_SECRET_ACCESS_KEY", - ] - - for var in required_env_vars: - if var not in os.environ: - pytest.skip(f"{var} not set") - - zarr_s3_endpoint = os.environ["ZARR_S3_ENDPOINT"] - zarr_s3_bucket_name = os.environ["ZARR_S3_BUCKET_NAME"] - zarr_s3_access_key_id = os.environ["ZARR_S3_ACCESS_KEY_ID"] - zarr_s3_secret_access_key = os.environ["ZARR_S3_SECRET_ACCESS_KEY"] - - dm = runtime.device_manager() - props = runtime.get_configuration() - video = props.video[0] - - video.camera.identifier = dm.select( - DeviceKind.Camera, "simulated.*empty.*" - ) - video.camera.settings.shape = (1920, 1080) - video.camera.settings.exposure_time_us = 1e4 - video.camera.settings.pixel_type = acquire.SampleType.U8 - - video.storage.identifier = dm.select( - DeviceKind.Storage, - "Zarr", - ) - video.storage.settings.uri = ( - f"{zarr_s3_endpoint}/{zarr_s3_bucket_name}/{request.node.name}.zarr" - ) - video.storage.settings.s3_access_key_id = zarr_s3_access_key_id - video.storage.settings.s3_secret_access_key = zarr_s3_secret_access_key - - video.max_frame_count = 64 - - # configure storage dimensions - dimension_x = acquire.StorageDimension( - name="x", kind="Space", array_size_px=1920, chunk_size_px=960 - ) - assert dimension_x.shard_size_chunks == 0 - - dimension_y = acquire.StorageDimension( - name="y", kind="Space", array_size_px=1080, chunk_size_px=540 - ) - assert dimension_y.shard_size_chunks == 0 + stream = ZarrStream(settings) + assert stream - dimension_t = acquire.StorageDimension( - name="t", kind="Time", array_size_px=0, chunk_size_px=64 + data = np.random.randint( + -255, + 255, + ( + settings.dimensions[0].chunk_size_px, + settings.dimensions[1].array_size_px, + settings.dimensions[2].array_size_px, + ), + dtype=np.int16, ) - assert dimension_t.shard_size_chunks == 0 - - video.storage.settings.acquisition_dimensions = [ - dimension_x, - dimension_y, - dimension_t, - ] + stream.append(data) - runtime.set_configuration(props) - - runtime.start() - runtime.stop() + del stream # close the stream, flush the files s3 = s3fs.S3FileSystem( - key=zarr_s3_access_key_id, - secret=zarr_s3_secret_access_key, - client_kwargs={"endpoint_url": zarr_s3_endpoint}, + key=settings.s3.access_key_id, + secret=settings.s3_secret_access_key, + client_kwargs={"endpoint_url": settings.s3_endpoint}, ) store = s3fs.S3Map( - root=f"{zarr_s3_bucket_name}/{request.node.name}.zarr", s3=s3 + root=f"{s3_settings.bucket_name}/{settings.store_path}", s3=s3 ) cache = zarr.LRUStoreCache(store, max_size=2**28) group = zarr.group(store=cache) data = group["0"] - assert data.chunks == (64, 540, 960) assert data.shape == ( - 64, - video.camera.settings.shape[1], - video.camera.settings.shape[0], - ) - assert data.nchunks == 4 + settings.dimensions[0].chunk_size_px, + settings.dimensions[1].array_size_px, + settings.dimensions[2].array_size_px, + ) + + if compression_codec is not None: + cname = ( + "lz4" + if compression_codec == CompressionCodec.BLOSC_LZ4 + else "zstd" + ) + assert data.compressor.cname == cname + assert data.compressor.clevel == 1 + assert data.compressor.shuffle == blosc.SHUFFLE + else: + assert data.compressor is None # cleanup - s3.rm(f"{zarr_s3_bucket_name}/{request.node.name}.zarr", recursive=True) -""" + s3.rm(store.root, recursive=True) + \ No newline at end of file From 2969c6addf19c807e647d571b80b70ab938c0636 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Mon, 14 Oct 2024 10:52:06 -0400 Subject: [PATCH 23/28] concurrency? --- .github/workflows/test.yml | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 85b81cc..4557686 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,6 +11,10 @@ on: env: BUILD_TYPE: Release +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + jobs: test: name: Test on ${{ matrix.platform }} @@ -33,10 +37,6 @@ jobs: permissions: actions: write - concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} - steps: - uses: actions/checkout@v3 with: @@ -76,10 +76,6 @@ jobs: MINIO_ACCESS_KEY: acquire MINIO_SECRET_KEY: 12345678 - concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} - steps: - uses: actions/checkout@v3 with: @@ -144,10 +140,6 @@ jobs: - "windows-latest" - "macos-latest" - concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} - steps: - uses: actions/checkout@v3 with: From 8e8aba80b5a55ca76b818ba7000c00c680945744 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Mon, 14 Oct 2024 12:44:12 -0400 Subject: [PATCH 24/28] Explicit nullopt --- python/acquire-zarr-py.cpp | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/python/acquire-zarr-py.cpp b/python/acquire-zarr-py.cpp index 2eb6d33..b671cb3 100644 --- a/python/acquire-zarr-py.cpp +++ b/python/acquire-zarr-py.cpp @@ -257,9 +257,11 @@ class PyZarrStreamSettings private: std::string store_path_; - std::optional custom_metadata_; - std::optional s3_settings_; - std::optional compression_settings_; + std::optional custom_metadata_{ std::nullopt }; + std::optional s3_settings_{ std::nullopt }; + std::optional compression_settings_{ + std::nullopt + }; bool multiscale_ = false; ZarrDataType data_type_{ ZarrDataType_uint8 }; ZarrVersion version_{ ZarrVersion_2 }; @@ -527,18 +529,23 @@ PYBIND11_MODULE(acquire_zarr, m) if (kwargs.contains("store_path")) settings.set_store_path(kwargs["store_path"].cast()); - if (kwargs.contains("custom_metadata")) - settings.set_custom_metadata( - kwargs["custom_metadata"].cast>()); - - if (kwargs.contains("s3")) - settings.set_s3( - kwargs["s3"].cast>()); - - if (kwargs.contains("compression")) - settings.set_compression( - kwargs["compression"] - .cast>()); + if (kwargs.contains("custom_metadata") && + !kwargs["custom_metadata"].is_none()) { + auto cm = kwargs["custom_metadata"].cast(); + settings.set_custom_metadata(cm); + } + + if (kwargs.contains("s3") && !kwargs["s3"].is_none()) { + auto s3 = kwargs["s3"].cast(); + settings.set_s3(s3); + } + + if (kwargs.contains("compression") && + !kwargs["compression"].is_none()) { + auto compression = + kwargs["compression"].cast(); + settings.set_compression(compression); + } if (kwargs.contains("dimensions")) settings.dimensions = From 08e95ed26417693af632ee970bc55179d240d75c Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Thu, 24 Oct 2024 17:30:12 -0400 Subject: [PATCH 25/28] Get it working ok on Windows --- python/acquire-zarr-py.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/python/acquire-zarr-py.cpp b/python/acquire-zarr-py.cpp index b671cb3..3683d4f 100644 --- a/python/acquire-zarr-py.cpp +++ b/python/acquire-zarr-py.cpp @@ -290,11 +290,11 @@ class PyZarrStream auto store_path = settings.store_path(); stream_settings.store_path = store_path.c_str(); - auto metadata = settings.custom_metadata(); - stream_settings.custom_metadata = - settings.custom_metadata().has_value() - ? settings.custom_metadata()->c_str() - : nullptr; + std::string metadata; + if (settings.custom_metadata()) { + metadata = settings.custom_metadata().value(); + stream_settings.custom_metadata = metadata.c_str(); + } if (settings.s3().has_value()) { s3_settings.endpoint = settings.s3()->endpoint().c_str(); @@ -566,10 +566,10 @@ PYBIND11_MODULE(acquire_zarr, m) .def("__repr__", [](const PyZarrStreamSettings& self) { std::string repr = - "StreamSettings(store_path='" + self.store_path(); + "StreamSettings(store_path='" + self.store_path() + "'"; if (self.custom_metadata().has_value()) { - repr += - ", custom_metadata='" + self.custom_metadata().value(); + repr += ", custom_metadata='" + + self.custom_metadata().value() + "'"; } if (self.s3().has_value()) { @@ -582,9 +582,10 @@ PYBIND11_MODULE(acquire_zarr, m) for (const auto& dim : self.dimensions) { repr += dim.repr() + ", "; } + + std::string multiscale = self.multiscale() ? "True" : "False"; repr += - "], multiscale=" + std::to_string(self.multiscale()) + - ", data_type=DataType." + + "], multiscale=" + multiscale + ", data_type=DataType." + std::string(data_type_to_str(self.data_type())) + ", version=ZarrVersion." + std::string(self.version() == ZarrVersion_2 ? "V2" : "V3") + From 597e49c90bd2c3e82c48a9205a599e3acf93bd28 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Fri, 25 Oct 2024 10:29:44 -0400 Subject: [PATCH 26/28] remove stub --- python/acquire_zarr.pyi | 78 ----------------------------------------- 1 file changed, 78 deletions(-) delete mode 100644 python/acquire_zarr.pyi diff --git a/python/acquire_zarr.pyi b/python/acquire_zarr.pyi deleted file mode 100644 index 81520c1..0000000 --- a/python/acquire_zarr.pyi +++ /dev/null @@ -1,78 +0,0 @@ -from typing import ( - Any, - ClassVar, - Dict, - Iterator, - List, - Optional, - Tuple, - final, - overload, -) - -class ZarrVersion: - V2: ClassVar[ZarrVersion] - V3: ClassVar[ZarrVersion] - -class DataType: - UINT8: ClassVar[DataType] - UINT16: ClassVar[DataType] - UINT32: ClassVar[DataType] - UINT64: ClassVar[DataType] - INT8: ClassVar[DataType] - INT16: ClassVar[DataType] - INT32: ClassVar[DataType] - INT64: ClassVar[DataType] - FLOAT32: ClassVar[DataType] - FLOAT64: ClassVar[DataType] - -class Compressor: - NONE: ClassVar[Compressor] - BLOSC1: ClassVar[Compressor] - -class CompressionCodec: - NONE: ClassVar[CompressionCodec] - BLOSC_LZ4: ClassVar[CompressionCodec] - BLOSC_ZSTD: ClassVar[CompressionCodec] - -class DimensionType: - SPACE: ClassVar[DimensionType] - CHANNEL: ClassVar[DimensionType] - TIME: ClassVar[DimensionType] - OTHER: ClassVar[DimensionType] - -class S3Settings: - endpoint: str - bucket_name: str - access_key_id: str - secret_access_key: str - -class CompressionSettings: - compressor: Compressor - codec: CompressionCodec - level: int - shuffle: int - -class Dimension: - name: str - kind: DimensionType - array_size_px: int - chunk_size_px: int - shard_size_chunks: int - -class StreamSettings: - store_path: str - custom_metadata: Optional[str] - s3: Optional[S3Settings] - compression: Optional[CompressionSettings] - dimensions: List[Dimension] - multiscale: bool - data_type: DataType - version: ZarrVersion - -class ZarrStream: - def append(self, data: Any) -> None: - ... - - def is_active(self) -> bool: - ... \ No newline at end of file From eee52c1554fc560719dc2d2d484dca4edd910fa0 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Mon, 28 Oct 2024 12:14:32 -0400 Subject: [PATCH 27/28] Use PROJECT_SOURCE_DIR instead of CMAKE_SOURCE_DIR --- python/CMakeLists.txt | 2 +- src/streaming/CMakeLists.txt | 6 +++--- tests/integration/CMakeLists.txt | 4 ++-- tests/unit-tests/CMakeLists.txt | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index b353c55..c569451 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -18,7 +18,7 @@ find_package(pybind11 REQUIRED) pybind11_add_module(acquire_zarr acquire-zarr-py.cpp) -target_include_directories(acquire_zarr PRIVATE ${CMAKE_SOURCE_DIR}/include) +target_include_directories(acquire_zarr PRIVATE ${PROJECT_SOURCE_DIR}/include) target_link_libraries(acquire_zarr PRIVATE acquire-zarr) set_target_properties(acquire_zarr PROPERTIES diff --git a/src/streaming/CMakeLists.txt b/src/streaming/CMakeLists.txt index 3ab50ac..cfa734c 100644 --- a/src/streaming/CMakeLists.txt +++ b/src/streaming/CMakeLists.txt @@ -33,10 +33,10 @@ add_library(${tgt} target_include_directories(${tgt} PUBLIC - $ + $ PRIVATE $ - $ + $ ) target_link_libraries(${tgt} PRIVATE @@ -60,6 +60,6 @@ install(TARGETS ${tgt} ) # Install public header files -install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/ +install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/ DESTINATION include ) \ No newline at end of file diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index bb50e1c..006c490 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -19,8 +19,8 @@ foreach (name ${tests}) MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>" ) target_include_directories(${tgt} PRIVATE - ${CMAKE_SOURCE_DIR}/include - ${CMAKE_SOURCE_DIR}/src/logger + ${PROJECT_SOURCE_DIR}/include + ${PROJECT_SOURCE_DIR}/src/logger ) target_link_libraries(${tgt} PRIVATE acquire-logger diff --git a/tests/unit-tests/CMakeLists.txt b/tests/unit-tests/CMakeLists.txt index ee9d165..aefbc34 100644 --- a/tests/unit-tests/CMakeLists.txt +++ b/tests/unit-tests/CMakeLists.txt @@ -34,9 +34,9 @@ foreach (name ${tests}) MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>" ) target_include_directories(${tgt} PRIVATE - ${CMAKE_SOURCE_DIR}/include - ${CMAKE_SOURCE_DIR}/src/logger - ${CMAKE_SOURCE_DIR}/src/streaming + ${PROJECT_SOURCE_DIR}/include + ${PROJECT_SOURCE_DIR}/src/logger + ${PROJECT_SOURCE_DIR}/src/streaming ) target_link_libraries(${tgt} PRIVATE acquire-logger From d6af9b6a7785b0c8847d3280669b04ba436c1d90 Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Mon, 28 Oct 2024 15:17:28 -0400 Subject: [PATCH 28/28] Respond to PR comments --- pyproject.toml | 2 +- src/streaming/zarr.stream.cpp | 26 +++++++++----------------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a1d708a..a9f6537 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ testing = [ ] [tool.black] -target-version = ['py39', 'py310', 'py311'] +target-version = ['py39', 'py310', 'py311', 'py312'] line-length = 79 [tool.isort] diff --git a/src/streaming/zarr.stream.cpp b/src/streaming/zarr.stream.cpp index c31e43d..1f1f8cb 100644 --- a/src/streaming/zarr.stream.cpp +++ b/src/streaming/zarr.stream.cpp @@ -29,8 +29,7 @@ is_compressed_acquisition(const struct ZarrStreamSettings_s* settings) return nullptr != settings->compression_settings; } -[[nodiscard]] -bool +[[nodiscard]] bool validate_s3_settings(const ZarrS3Settings* settings) { if (zarr::is_empty_string(settings->endpoint, "S3 endpoint is empty")) { @@ -57,8 +56,7 @@ validate_s3_settings(const ZarrS3Settings* settings) return true; } -[[nodiscard]] -bool +[[nodiscard]] bool validate_filesystem_store_path(std::string_view data_root) { fs::path path(data_root); @@ -89,8 +87,7 @@ validate_filesystem_store_path(std::string_view data_root) return true; } -[[nodiscard]] -bool +[[nodiscard]] bool validate_compression_settings(const ZarrCompressionSettings* settings) { if (settings->compressor >= ZarrCompressorCount) { @@ -135,8 +132,7 @@ validate_compression_settings(const ZarrCompressionSettings* settings) return true; } -[[nodiscard]] -bool +[[nodiscard]] bool validate_custom_metadata(const char* metadata) { if (metadata == nullptr || !*metadata) { @@ -158,8 +154,7 @@ validate_custom_metadata(const char* metadata) return true; } -[[nodiscard]] -bool +[[nodiscard]] bool validate_dimension(const ZarrDimensionProperties* dimension, ZarrVersion version, bool is_append) @@ -191,8 +186,7 @@ validate_dimension(const ZarrDimensionProperties* dimension, return true; } -[[nodiscard]] -bool +[[nodiscard]] bool validate_settings(const struct ZarrStreamSettings_s* settings) { if (!settings) { @@ -292,8 +286,7 @@ dimension_type_to_string(ZarrDimensionType type) } template -[[nodiscard]] -std::byte* +[[nodiscard]] std::byte* scale_image(const std::byte* const src, size_t& bytes_of_src, size_t& width, @@ -881,8 +874,7 @@ ZarrStream_s::write_multiscale_frames_(const std::byte* data, return; } - std::function - scale; + std::function scale; std::function average2; switch (dtype_) { @@ -1005,4 +997,4 @@ finalize_stream(struct ZarrStream_s* stream) } return true; -} \ No newline at end of file +}