From 7bfd11933db0eedde644418c01254b76f3cad202 Mon Sep 17 00:00:00 2001 From: Zach Schuermann Date: Mon, 25 Nov 2024 15:14:18 -0800 Subject: [PATCH] [ffi][prefactor] add `kernel_utils` header in read_table example (#539) moved out some pieces of `read_table.c` and put into `kernel_utils.h/c`. just some small README updates otherwise. --- ffi/README.md | 10 +++- ffi/examples/read-table/CMakeLists.txt | 7 ++- ffi/examples/read-table/README.md | 23 ++++++++- ffi/examples/read-table/arrow.c | 1 + ffi/examples/read-table/kernel_utils.c | 70 ++++++++++++++++++++++++++ ffi/examples/read-table/kernel_utils.h | 24 +++++++++ ffi/examples/read-table/read_table.c | 66 +----------------------- ffi/examples/read-table/read_table.h | 19 ------- ffi/examples/read-table/schema.h | 1 + 9 files changed, 134 insertions(+), 87 deletions(-) create mode 100644 ffi/examples/read-table/kernel_utils.c create mode 100644 ffi/examples/read-table/kernel_utils.h diff --git a/ffi/README.md b/ffi/README.md index 7c53839ed..6106b685f 100644 --- a/ffi/README.md +++ b/ffi/README.md @@ -25,6 +25,14 @@ make ./read_table ../../../../kernel/tests/data/table-with-dv-small ``` +Note there are two configurations that can currently be configured in cmake: +```bash +# turn on VERBOSE mode (default is off) - print more diagnostics +$ cmake -DVERBOSE=yes .. +# turn off PRINT_DATA (default is on) - see below +$ cmake -DPRINT_DATA=no .. +``` + By default this has a dependency on [`arrow-glib`](https://github.com/apache/arrow/blob/main/c_glib/README.md). You can read install instructions for your platform [here](https://arrow.apache.org/install/). @@ -45,4 +53,4 @@ By default the VSCode C/C++ Extension does not use any defines flags. You can op "DEFINE_DEFAULT_ENGINE", "DEFINE_SYNC_ENGINE" ] -``` +``` \ No newline at end of file diff --git a/ffi/examples/read-table/CMakeLists.txt b/ffi/examples/read-table/CMakeLists.txt index 6533bf73e..22461b513 100644 --- a/ffi/examples/read-table/CMakeLists.txt +++ b/ffi/examples/read-table/CMakeLists.txt @@ -1,7 +1,8 @@ cmake_minimum_required(VERSION 3.12) project(read_table) option(PRINT_DATA "Print out the table data. Requires arrow-glib" ON) -add_executable(read_table read_table.c arrow.c) +option(VERBOSE "Enable for more diagnostics messages." OFF) +add_executable(read_table read_table.c arrow.c kernel_utils.c) target_compile_definitions(read_table PUBLIC DEFINE_DEFAULT_ENGINE) target_include_directories(read_table PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/ffi-headers") target_link_directories(read_table PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/debug") @@ -29,6 +30,10 @@ else() target_link_options(read_table PRIVATE -g -fsanitize=address) endif() +if(VERBOSE) + target_compile_definitions(read_table PUBLIC VERBOSE) +endif(VERBOSE) + if(PRINT_DATA) include(FindPkgConfig) pkg_check_modules(GLIB REQUIRED glib-2.0) diff --git a/ffi/examples/read-table/README.md b/ffi/examples/read-table/README.md index abe7b86bc..11bef7f66 100644 --- a/ffi/examples/read-table/README.md +++ b/ffi/examples/read-table/README.md @@ -7,6 +7,22 @@ Simple example to show how to read and dump the data of a table using kernel's c This example is built with [cmake]. Instructions below assume you start in the directory containing this README. +Note that prior to building these examples you must build `delta_kernel_ffi` (see [the FFI readme] for details). TLDR: +```bash +# from repo root +$ cargo build -p delta_kernel_ffi [--release] [--features default-engine] +# from ffi/ dir +$ cargo build [--release] [--features default-engine] +``` + +There are two configurations that can currently be configured in cmake: +```bash +# turn on VERBOSE mode (default is off) - print more diagnostics +$ cmake -DVERBOSE=yes .. +# turn off PRINT_DATA (default is on) - see below +$ cmake -DPRINT_DATA=no .. +``` + ## Linux / MacOS Most likely something like this should work: @@ -37,8 +53,13 @@ This example uses the `arrow-glib (c)` component from arrow to print out data. T _installing_ that component which can be non-trivial. Please see [here](https://arrow.apache.org/install/) to find installation instructions for your system. +For macOS and homebrew this should be as easy as: +``` +brew install apache-arrow-glib +``` + If you don't want to have to install this, you can run `ccmake ..` (`cmake-gui.exe ..` on windows) from the `build` directory, and turn `OFF`/uncheckmark `PRINT_DATA`. Then "configure" and "generate" and follow the above instructions again. -[cmake]: https://cmake.org/ +[cmake]: https://cmake.org/ \ No newline at end of file diff --git a/ffi/examples/read-table/arrow.c b/ffi/examples/read-table/arrow.c index 0aeedc134..d58a2fa2d 100644 --- a/ffi/examples/read-table/arrow.c +++ b/ffi/examples/read-table/arrow.c @@ -1,4 +1,5 @@ #include "arrow.h" +#include "kernel_utils.h" #include #include diff --git a/ffi/examples/read-table/kernel_utils.c b/ffi/examples/read-table/kernel_utils.c new file mode 100644 index 000000000..64262414a --- /dev/null +++ b/ffi/examples/read-table/kernel_utils.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include "kernel_utils.h" + +// some diagnostic functions +void print_diag(char* fmt, ...) +{ +#ifdef VERBOSE + va_list args; + va_start(args, fmt); + vprintf(fmt, args); + va_end(args); +#else + (void)(fmt); +#endif +} + +// Print out an error message, plus the code and kernel message of an error +void print_error(const char* msg, Error* err) +{ + printf("[ERROR] %s\n", msg); + printf(" Kernel Code: %i\n", err->etype.etype); + printf(" Kernel Msg: %s\n", err->msg); +} + +// free an error +void free_error(Error* error) +{ + free(error->msg); + free(error); +} + +// kernel will call this to allocate our errors. This can be used to create an "engine native" type +// error +EngineError* allocate_error(KernelError etype, const KernelStringSlice msg) +{ + Error* error = malloc(sizeof(Error)); + error->etype.etype = etype; + char* charmsg = allocate_string(msg); + error->msg = charmsg; + return (EngineError*)error; +} + +#ifdef WIN32 // windows doesn't have strndup +char *strndup(const char *s, size_t n) { + size_t len = strnlen(s, n); + char *p = malloc(len + 1); + if (p) { + memcpy(p, s, len); + p[len] = '\0'; + } + return p; +} +#endif + +// utility to turn a slice into a char* +void* allocate_string(const KernelStringSlice slice) +{ + return strndup(slice.ptr, slice.len); +} + +// utility function to convert key/val into slices and set them on a builder +void set_builder_opt(EngineBuilder* engine_builder, char* key, char* val) +{ + KernelStringSlice key_slice = { key, strlen(key) }; + KernelStringSlice val_slice = { val, strlen(val) }; + set_builder_option(engine_builder, key_slice, val_slice); +} + diff --git a/ffi/examples/read-table/kernel_utils.h b/ffi/examples/read-table/kernel_utils.h new file mode 100644 index 000000000..c6e60b960 --- /dev/null +++ b/ffi/examples/read-table/kernel_utils.h @@ -0,0 +1,24 @@ +#pragma once + +#include + +// This is how we represent our errors. The kernel will ask us to contruct this struct whenever it +// enounters an error, and then return the contructed EngineError to us +typedef struct Error +{ + struct EngineError etype; + char* msg; +} Error; + +void print_diag(char* fmt, ...); +// Print out an error message, plus the code and kernel message of an error +void print_error(const char* msg, Error* err); +// free an error +void free_error(Error* error); +// create a char* from a KernelStringSlice +void* allocate_string(const KernelStringSlice slice); +// kernel will call this to allocate our errors. This can be used to create an "engine native" type +// error +EngineError* allocate_error(KernelError etype, const KernelStringSlice msg); +// utility function to convert key/val into slices and set them on a builder +void set_builder_opt(EngineBuilder* engine_builder, char* key, char* val); diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c index b40f7a0da..2c8738cd1 100644 --- a/ffi/examples/read-table/read_table.c +++ b/ffi/examples/read-table/read_table.c @@ -5,34 +5,7 @@ #include "arrow.h" #include "read_table.h" #include "schema.h" - -// some diagnostic functions -void print_diag(char* fmt, ...) -{ -#ifdef VERBOSE - va_list args; - va_start(args, fmt); - vprintf(fmt, args); - va_end(args); -#else - (void)(fmt); -#endif -} - -// Print out an error message, plus the code and kernel message of an error -void print_error(const char* msg, Error* err) -{ - printf("[ERROR] %s\n", msg); - printf(" Kernel Code: %i\n", err->etype.etype); - printf(" Kernel Msg: %s\n", err->msg); -} - -// free an error -void free_error(Error* error) -{ - free(error->msg); - free(error); -} +#include "kernel_utils.h" // Print the content of a selection vector if `VERBOSE` is defined in read_table.h void print_selection_vector(const char* indent, const KernelBoolSlice* selection_vec) @@ -68,43 +41,6 @@ void print_partition_info(struct EngineContext* context, const CStringMap* parti #endif } -// kernel will call this to allocate our errors. This can be used to create an "engine native" type -// error -EngineError* allocate_error(KernelError etype, const KernelStringSlice msg) -{ - Error* error = malloc(sizeof(Error)); - error->etype.etype = etype; - char* charmsg = allocate_string(msg); - error->msg = charmsg; - return (EngineError*)error; -} - -#ifdef WIN32 // windows doesn't have strndup -char *strndup(const char *s, size_t n) { - size_t len = strnlen(s, n); - char *p = malloc(len + 1); - if (p) { - memcpy(p, s, len); - p[len] = '\0'; - } - return p; -} -#endif - -// utility to turn a slice into a char* -void* allocate_string(const KernelStringSlice slice) -{ - return strndup(slice.ptr, slice.len); -} - -// utility function to convert key/val into slices and set them on a builder -void set_builder_opt(EngineBuilder* engine_builder, char* key, char* val) -{ - KernelStringSlice key_slice = { key, strlen(key) }; - KernelStringSlice val_slice = { val, strlen(val) }; - set_builder_option(engine_builder, key_slice, val_slice); -} - // Kernel will call this function for each file that should be scanned. The arguments include enough // context to constuct the correct logical data from the physically read parquet void scan_row_callback( diff --git a/ffi/examples/read-table/read_table.h b/ffi/examples/read-table/read_table.h index d3fe5ff9a..28d9c72dc 100644 --- a/ffi/examples/read-table/read_table.h +++ b/ffi/examples/read-table/read_table.h @@ -1,8 +1,5 @@ #pragma once -// uncomment below for more diagnotic messages -// #define VERBOSE - #include // A list of partition column names @@ -26,19 +23,3 @@ struct EngineContext struct ArrowContext* arrow_context; #endif }; - -// This is how we represent our errors. The kernel will ask us to contruct this struct whenever it -// enounters an error, and then return the contructed EngineError to us -typedef struct Error -{ - struct EngineError etype; - char* msg; -} Error; - -void print_diag(char* fmt, ...); -// Print out an error message, plus the code and kernel message of an error -void print_error(const char* msg, Error* err); -// free an error -void free_error(Error* error); -// create a char* from a KernelStringSlice -void* allocate_string(const KernelStringSlice slice); diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index cdef2f215..21fd40a2e 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -1,5 +1,6 @@ #include "delta_kernel_ffi.h" #include "read_table.h" +#include "kernel_utils.h" #include /**