From ac80e3a80110d80316d44aaf7616fe785ebd05a0 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 5 Dec 2024 10:03:10 +0100 Subject: [PATCH 1/5] bump-kernel --- CMakeLists.txt | 2 +- src/include/delta_kernel_ffi.hpp | 314 ++++++++++++++++++++++++------- 2 files changed, 251 insertions(+), 65 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 50df657..1113da8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,7 +141,7 @@ ExternalProject_Add( # the c++ headers. Currently, when bumping the kernel version, the produced # header in ./src/include/delta_kernel_ffi.hpp should be also bumped, applying # the fix - GIT_TAG v0.4.0 + GIT_TAG v0.5.0 # Prints the env variables passed to the cargo build to the terminal, useful # in debugging because passing them through CMake is an error-prone mess CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp index 6f1401e..3b6a615 100644 --- a/src/include/delta_kernel_ffi.hpp +++ b/src/include/delta_kernel_ffi.hpp @@ -3,8 +3,8 @@ #include #include #include -#include #include +#include namespace ffi { @@ -40,6 +40,7 @@ enum class KernelError { MalformedJsonError, MissingMetadataError, MissingProtocolError, + InvalidProtocolError, MissingMetadataAndProtocolError, ParseError, JoinFailureError, @@ -52,6 +53,12 @@ enum class KernelError { InternalError, InvalidExpression, InvalidLogPath, + InvalidCommitInfo, + FileAlreadyExists, + MissingCommitInfo, + UnsupportedError, + ParseIntervalError, + ChangeDataFeedUnsupported, }; struct CStringMap; @@ -73,6 +80,8 @@ struct ExclusiveFileReadResultIterator; struct KernelExpressionVisitorState; +struct SharedExpression; + struct SharedExternEngine; struct SharedGlobalScanState; @@ -179,19 +188,20 @@ struct ExternResult { /// Intentionally not Copy, Clone, Send, nor Sync. /// /// Whoever instantiates the struct must ensure it does not outlive the data it points to. The -/// compiler cannot help us here, because raw pointers don't have lifetimes. To reduce the risk of -/// accidental misuse, it is recommended to only instantiate this struct as a function arg, by -/// converting a string slice `Into` a `KernelStringSlice`. That way, the borrowed reference at call -/// site protects the `KernelStringSlice` until the function returns. Meanwhile, the callee should -/// assume that the slice is only valid until the function returns, and must not retain any -/// references to the slice or its data that could outlive the function call. -/// -/// ``` -/// # use delta_kernel_ffi::KernelStringSlice; -/// fn wants_slice(slice: KernelStringSlice) { } -/// let msg = String::from("hello"); -/// wants_slice(msg.into()); +/// compiler cannot help us here, because raw pointers don't have lifetimes. A good rule of thumb is +/// to always use the [`kernel_string_slice`] macro to create string slices, and to avoid returning +/// a string slice from a code block or function (since the move risks over-extending its lifetime): +/// +/// ```ignore +/// # // Ignored because this code is pub(crate) and doc tests cannot compile it +/// let dangling_slice = { +/// let tmp = String::from("tmp"); +/// kernel_string_slice!(tmp) +/// } /// ``` +/// +/// Meanwhile, the callee must assume that the slice is only valid until the function returns, and +/// must not retain any references to the slice or its data that might outlive the function call. struct KernelStringSlice { const char *ptr; uintptr_t len; @@ -205,22 +215,6 @@ using NullableCvoid = void *; /// function is that `kernel_str` is _only_ valid until the return from this function using AllocateStringFn = NullableCvoid (*)(KernelStringSlice kernel_str); -struct FileMeta { - KernelStringSlice path; - int64_t last_modified; - uintptr_t size; -}; - -/// Model iterators. This allows an engine to specify iteration however it likes, and we simply wrap -/// the engine functions. The engine retains ownership of the iterator. -struct EngineIterator { - void *data; - /// A function that should advance the iterator and return the next time from the data - /// If the iterator is complete, it should return null. It should be safe to - /// call `get_next()` multiple times if it returns null. - const void *(*get_next)(void *data); -}; - /// ABI-compatible struct for ArrowArray from C Data Interface /// See /// @@ -278,6 +272,182 @@ struct ArrowFFIData { }; #endif +struct FileMeta { + KernelStringSlice path; + int64_t last_modified; + uintptr_t size; +}; + +/// Model iterators. This allows an engine to specify iteration however it likes, and we simply wrap +/// the engine functions. The engine retains ownership of the iterator. +struct EngineIterator { + void *data; + /// A function that should advance the iterator and return the next time from the data + /// If the iterator is complete, it should return null. It should be safe to + /// call `get_next()` multiple times if it returns null. + const void *(*get_next)(void *data); +}; + +template +using VisitLiteralFn = void (*)(void *data, uintptr_t sibling_list_id, T value); + +using VisitVariadicFn = void (*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); + +using VisitUnaryFn = void (*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); + +using VisitBinaryOpFn = void (*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); + +/// The [`EngineExpressionVisitor`] defines a visitor system to allow engines to build their own +/// representation of a kernel expression. +/// +/// The model is list based. When the kernel needs a list, it will ask engine to allocate one of a +/// particular size. Once allocated the engine returns an `id`, which can be any integer identifier +/// ([`usize`]) the engine wants, and will be passed back to the engine to identify the list in the +/// future. +/// +/// Every expression the kernel visits belongs to some list of "sibling" elements. The schema +/// itself is a list of schema elements, and every complex type (struct expression, array, variadic, etc) +/// contains a list of "child" elements. +/// 1. Before visiting any complex expression type, the kernel asks the engine to allocate a list to +/// hold its children +/// 2. When visiting any expression element, the kernel passes its parent's "child list" as the +/// "sibling list" the element should be appended to: +/// - For a struct literal, first visit each struct field and visit each value +/// - For a struct expression, visit each sub expression. +/// - For an array literal, visit each of the elements. +/// - For a variadic `and` or `or` expression, visit each sub-expression. +/// - For a binary operator expression, visit the left and right operands. +/// - For a unary `is null` or `not` expression, visit the sub-expression. +/// 3. When visiting a complex expression, the kernel also passes the "child list" containing +/// that element's (already-visited) children. +/// 4. The [`visit_expression`] method returns the id of the list of top-level columns +/// +/// WARNING: The visitor MUST NOT retain internal references to string slices or binary data passed +/// to visitor methods +/// TODO: Visit type information in struct field and null. This will likely involve using the schema +/// visitor. Note that struct literals are currently in flux, and may change significantly. Here is the relevant +/// issue: https://github.com/delta-io/delta-kernel-rs/issues/412 +struct EngineExpressionVisitor { + /// An opaque engine state pointer + void *data; + /// Creates a new expression list, optionally reserving capacity up front + uintptr_t (*make_field_list)(void *data, uintptr_t reserve); + /// Visit a 32bit `integer` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_int; + /// Visit a 64bit `long` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_long; + /// Visit a 16bit `short` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_short; + /// Visit an 8bit `byte` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_byte; + /// Visit a 32bit `float` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_float; + /// Visit a 64bit `double` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_double; + /// Visit a `string` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_string; + /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_bool; + /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. + /// The timestamp is microsecond precision and adjusted to UTC. + VisitLiteralFn visit_literal_timestamp; + /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. + /// The timestamp is microsecond precision with no timezone. + VisitLiteralFn visit_literal_timestamp_ntz; + /// Visit a 32bit intger `date` representing days since UNIX epoch 1970-01-01. The `date` belongs + /// to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_date; + /// Visit binary data at the `buffer` with length `len` belonging to the list identified by + /// `sibling_list_id`. + void (*visit_literal_binary)(void *data, uintptr_t sibling_list_id, const uint8_t *buffer, uintptr_t len); + /// Visit a 128bit `decimal` value with the given precision and scale. The 128bit integer + /// is split into the most significant 64 bits in `value_ms`, and the least significant 64 + /// bits in `value_ls`. The `decimal` belongs to the list identified by `sibling_list_id`. + void (*visit_literal_decimal)(void *data, uintptr_t sibling_list_id, uint64_t value_ms, uint64_t value_ls, + uint8_t precision, uint8_t scale); + /// Visit a struct literal belonging to the list identified by `sibling_list_id`. + /// The field names of the struct are in a list identified by `child_field_list_id`. + /// The values of the struct are in a list identified by `child_value_list_id`. + void (*visit_literal_struct)(void *data, uintptr_t sibling_list_id, uintptr_t child_field_list_id, + uintptr_t child_value_list_id); + /// Visit an array literal belonging to the list identified by `sibling_list_id`. + /// The values of the array are in a list identified by `child_list_id`. + void (*visit_literal_array)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); + /// Visits a null value belonging to the list identified by `sibling_list_id. + void (*visit_literal_null)(void *data, uintptr_t sibling_list_id); + /// Visits an `and` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the array are in a list identified by `child_list_id` + VisitVariadicFn visit_and; + /// Visits an `or` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the array are in a list identified by `child_list_id` + VisitVariadicFn visit_or; + /// Visits a `not` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expression will be in a _one_ item list identified by `child_list_id` + VisitUnaryFn visit_not; + /// Visits a `is_null` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expression will be in a _one_ item list identified by `child_list_id` + VisitUnaryFn visit_is_null; + /// Visits the `LessThan` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_lt; + /// Visits the `LessThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_le; + /// Visits the `GreaterThan` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_gt; + /// Visits the `GreaterThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_ge; + /// Visits the `Equal` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_eq; + /// Visits the `NotEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_ne; + /// Visits the `Distinct` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_distinct; + /// Visits the `In` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_in; + /// Visits the `NotIn` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_not_in; + /// Visits the `Add` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_add; + /// Visits the `Minus` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_minus; + /// Visits the `Multiply` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_multiply; + /// Visits the `Divide` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_divide; + /// Visits the `column` belonging to the list identified by `sibling_list_id`. + void (*visit_column)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visits a `StructExpression` belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the `StructExpression` are in a list identified by `child_list_id` + void (*visit_struct_expr)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); +}; + +// This trickery is from https://github.com/mozilla/cbindgen/issues/402#issuecomment-578680163 +struct im_an_unused_struct_that_tricks_msvc_into_compilation { + ExternResult field; + ExternResult field2; + ExternResult field3; + ExternResult> field4; + ExternResult> field5; + ExternResult field6; + ExternResult field7; + ExternResult> field8; + ExternResult> field9; + ExternResult> field10; + ExternResult field11; +}; + /// A predicate that can be used to skip data when scanning. /// /// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate, @@ -305,21 +475,6 @@ struct Stats { using CScanCallback = void (*)(NullableCvoid engine_context, KernelStringSlice path, int64_t size, const Stats *stats, const DvInfo *dv_info, const CStringMap *partition_map); -// This trickery is from https://github.com/mozilla/cbindgen/issues/402#issuecomment-578680163 -struct im_an_unused_struct_that_tricks_msvc_into_compilation { - ExternResult field; - ExternResult field2; - ExternResult field3; - ExternResult> field4; - ExternResult> field5; - ExternResult field6; - ExternResult field7; - ExternResult> field8; - ExternResult> field9; - ExternResult> field10; - ExternResult field11; -}; - /// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own /// representation of a schema from a particular schema within kernel. /// @@ -498,6 +653,32 @@ bool string_slice_next(Handle data, NullableCvoid engine_co /// Caller is responsible for (at most once) passing a valid pointer to a [`StringSliceIterator`] void free_string_slice_data(Handle data); +/// Get the number of rows in an engine data +/// +/// # Safety +/// `data_handle` must be a valid pointer to a kernel allocated `ExclusiveEngineData` +uintptr_t engine_data_length(Handle *data); + +/// Allow an engine to "unwrap" an [`ExclusiveEngineData`] into the raw pointer for the case it wants +/// to use its own engine data format +/// +/// # Safety +/// +/// `data_handle` must be a valid pointer to a kernel allocated `ExclusiveEngineData`. The Engine must +/// ensure the handle outlives the returned pointer. +void *get_raw_engine_data(Handle data); + +#if defined(DEFINE_DEFAULT_ENGINE) +/// Get an [`ArrowFFIData`] to allow binding to the arrow [C Data +/// Interface](https://arrow.apache.org/docs/format/CDataInterface.html). This includes the data and +/// the schema. If this function returns an `Ok` variant the _engine_ must free the returned struct. +/// +/// # Safety +/// data_handle must be a valid ExclusiveEngineData as read by the +/// [`delta_kernel::engine::default::DefaultEngine`] obtained from `get_default_engine`. +ExternResult get_raw_arrow_data(Handle data, Handle engine); +#endif + /// Call the engine back with the next `EngingeData` batch read by Parquet/Json handler. The /// _engine_ "owns" the data that is passed into the `engine_visitor`, since it is allocated by the /// `Engine` being used for log-replay. If the engine wants the kernel to free this data, it _must_ @@ -565,38 +746,31 @@ uintptr_t visit_expression_literal_double(KernelExpressionVisitorState *state, d uintptr_t visit_expression_literal_bool(KernelExpressionVisitorState *state, bool value); -/// Get the number of rows in an engine data +/// Free the memory the passed SharedExpression /// /// # Safety -/// `data_handle` must be a valid pointer to a kernel allocated `ExclusiveEngineData` -uintptr_t engine_data_length(Handle *data); +/// Engine is responsible for passing a valid SharedExpression +void free_kernel_predicate(Handle data); -/// Allow an engine to "unwrap" an [`ExclusiveEngineData`] into the raw pointer for the case it wants -/// to use its own engine data format -/// -/// # Safety +/// Visit the expression of the passed [`SharedExpression`] Handle using the provided `visitor`. +/// See the documentation of [`EngineExpressionVisitor`] for a description of how this visitor +/// works. /// -/// `data_handle` must be a valid pointer to a kernel allocated `ExclusiveEngineData`. The Engine must -/// ensure the handle outlives the returned pointer. -void *get_raw_engine_data(Handle data); - -#if defined(DEFINE_DEFAULT_ENGINE) -/// Get an [`ArrowFFIData`] to allow binding to the arrow [C Data -/// Interface](https://arrow.apache.org/docs/format/CDataInterface.html). This includes the data and -/// the schema. +/// This method returns the id that the engine generated for the top level expression /// /// # Safety -/// data_handle must be a valid ExclusiveEngineData as read by the -/// [`delta_kernel::engine::default::DefaultEngine`] obtained from `get_default_engine`. -ExternResult get_raw_arrow_data(Handle data, Handle engine); -#endif +/// +/// The caller must pass a valid SharedExpression Handle and expression visitor +uintptr_t visit_expression(const Handle *expression, EngineExpressionVisitor *visitor); /// Drops a scan. /// # Safety /// Caller is responsible for passing a [valid][Handle#Validity] scan handle. void free_scan(Handle scan); -/// Get a [`Scan`] over the table specified by the passed snapshot. +/// Get a [`Scan`] over the table specified by the passed snapshot. It is the responsibility of the +/// _engine_ to free this scan when complete by calling [`free_scan`]. +/// /// # Safety /// /// Caller is responsible for passing a valid snapshot pointer, and engine pointer @@ -650,6 +824,10 @@ void free_global_scan_state(Handle state); ExternResult> kernel_scan_data_init(Handle engine, Handle scan); +/// Call the provided `engine_visitor` on the next scan data item. The visitor will be provided with +/// a selection vector and engine data. It is the responsibility of the _engine_ to free these when +/// it is finished by calling [`free_bool_slice`] and [`free_engine_data`] respectively. +/// /// # Safety /// /// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by @@ -706,6 +884,14 @@ void visit_scan_data(Handle data, KernelBoolSlice selection /// Caller is responsible for passing a valid snapshot handle and schema visitor. uintptr_t visit_schema(Handle snapshot, EngineSchemaVisitor *visitor); +/// Constructs a kernel expression that is passed back as a SharedExpression handle. The expected +/// output expression can be found in `ffi/tests/test_expression_visitor/expected.txt`. +/// +/// # Safety +/// The caller is responsible for freeing the retured memory, either by calling +/// [`free_kernel_predicate`], or [`Handle::drop_handle`] +Handle get_testing_kernel_expression(); + } // extern "C" } // namespace ffi From 582202e5c83b8a4cc73590fb01f6dd87c1fab89a Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 5 Dec 2024 10:10:50 +0100 Subject: [PATCH 2/5] fix enum util --- src/delta_utils.cpp | 69 ++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index 1a8ff04..ae42676 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -107,37 +107,48 @@ ffi::EngineError *DuckDBEngineError::AllocateError(ffi::KernelError etype, ffi:: string DuckDBEngineError::KernelErrorEnumToString(ffi::KernelError err) { const char *KERNEL_ERROR_ENUM_STRINGS[] = { "UnknownError", - "FFIError", - "ArrowError", - "EngineDataTypeError", - "ExtractError", - "GenericError", - "IOErrorError", - "ParquetError", - "ObjectStoreError", - "ObjectStorePathError", - "Reqwest", - "FileNotFoundError", - "MissingColumnError", - "UnexpectedColumnTypeError", - "MissingDataError", - "MissingVersionError", - "DeletionVectorError", - "InvalidUrlError", - "MalformedJsonError", - "MissingMetadataError", - "MissingProtocolError", - "MissingMetadataAndProtocolError", - "ParseError", - "JoinFailureError", - "Utf8Error", - "ParseIntError", - "InvalidColumnMappingMode", - "InvalidTableLocation", - "InvalidDecimalError", + "FFIError", + "ArrowError", + "EngineDataTypeError", + "ExtractError", + "GenericError", + "IOErrorError", + "ParquetError", + "ObjectStoreError", + "ObjectStorePathError", + "ReqwestError", + "FileNotFoundError", + "MissingColumnError", + "UnexpectedColumnTypeError", + "MissingDataError", + "MissingVersionError", + "DeletionVectorError", + "InvalidUrlError", + "MalformedJsonError", + "MissingMetadataError", + "MissingProtocolError", + "InvalidProtocolError", + "MissingMetadataAndProtocolError", + "ParseError", + "JoinFailureError", + "Utf8Error", + "ParseIntError", + "InvalidColumnMappingModeError", + "InvalidTableLocationError", + "InvalidDecimalError", + "InvalidStructDataError", + "InternalError", + "InvalidExpression", + "InvalidLogPath", + "InvalidCommitInfo", + "FileAlreadyExists", + "MissingCommitInfo", + "UnsupportedError", + "ParseIntervalError", + "ChangeDataFeedUnsupported" }; - static_assert(sizeof(KERNEL_ERROR_ENUM_STRINGS) / sizeof(char *) - 1 == (int)ffi::KernelError::InvalidDecimalError, + static_assert(sizeof(KERNEL_ERROR_ENUM_STRINGS) / sizeof(char *) - 1 == (int)ffi::KernelError::ChangeDataFeedUnsupported, "KernelErrorEnumStrings mismatched with kernel"); if ((int)err < sizeof(KERNEL_ERROR_ENUM_STRINGS) / sizeof(char *)) { From 14e2e4580c6e5ff65e093d18adf39bc29bb4905c Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 5 Dec 2024 10:13:16 +0100 Subject: [PATCH 3/5] checkout v4 --- .github/workflows/CloudTesting.yml | 2 +- .github/workflows/LocalTesting.yml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/CloudTesting.yml b/.github/workflows/CloudTesting.yml index 93c627a..593694e 100644 --- a/.github/workflows/CloudTesting.yml +++ b/.github/workflows/CloudTesting.yml @@ -26,7 +26,7 @@ jobs: sudo apt-get update -y -qq sudo apt-get install -y -qq ninja-build make gcc-multilib g++-multilib zip unzip build-essential checkinstall curl libz-dev openssh-client - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 submodules: 'true' diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index b3a897d..e8ff44b 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -24,7 +24,7 @@ jobs: ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 submodules: 'true' @@ -90,7 +90,7 @@ jobs: VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 submodules: 'true' @@ -179,7 +179,7 @@ jobs: VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 submodules: 'true' From 307049a6e7b4104dc594fdffc2e0abd795305c3b Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 5 Dec 2024 10:19:40 +0100 Subject: [PATCH 4/5] fix old manylinux based ci job --- .github/workflows/LocalTesting.yml | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index e8ff44b..515bbfb 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -14,7 +14,6 @@ jobs: azurite-tests-linux: name: Azurite (local azure test server) tests (Linux) runs-on: ubuntu-latest - container: 'quay.io/pypa/manylinux2014_x86_64' env: VCPKG_TARGET_TRIPLET: 'x64-linux' GEN: Ninja @@ -29,27 +28,33 @@ jobs: fetch-depth: 0 submodules: 'true' + - name: Install Ninja + shell: bash + run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build + + - name: Setup Ccache + uses: hendrikmuhs/ccache-action@main + with: + key: ${{ github.job }} + + - name: Setup vcpkg + uses: lukka/run-vcpkg@v11.1 + with: + vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 + + - uses: actions/setup-node@v4 + - name: install Azure test service run: | - yum install -y nodejs npm npm install -g azurite echo -e "[azure-cli]\nname=Azure CLI\nbaseurl=https://packages.microsoft.com/yumrepos/azure-cli\nenabled=1\ngpgcheck=1\ngpgkey=https://packages.microsoft.com/keys/microsoft.asc" | tee /etc/yum.repos.d/azure-cli.repo - yum install -y azure-cli - - - name: Setup ManyLinux2014 - run: | - ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh python_alias openssl + sudo apt-get install -y azure-cli - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here) run: | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y echo "$HOME/.cargo/bin" >> $GITHUB_PATH - - name: Setup vcpkg - uses: lukka/run-vcpkg@v11.1 - with: - vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 - - name: Handle OpenSSL dependency for rust build run: | echo "OPENSSL_ROOT_DIR=`pwd`/build/release/vcpkg_installed/x64-linux" >> $GITHUB_ENV From 526f5f3373dc064243cb938f6f99fe22b9c735ca Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 5 Dec 2024 10:27:54 +0100 Subject: [PATCH 5/5] remove old line of azurite initialization --- .github/workflows/LocalTesting.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index 515bbfb..34457eb 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -47,7 +47,6 @@ jobs: - name: install Azure test service run: | npm install -g azurite - echo -e "[azure-cli]\nname=Azure CLI\nbaseurl=https://packages.microsoft.com/yumrepos/azure-cli\nenabled=1\ngpgcheck=1\ngpgkey=https://packages.microsoft.com/keys/microsoft.asc" | tee /etc/yum.repos.d/azure-cli.repo sudo apt-get install -y azure-cli - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here)