diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index d2810c39f723c..4d2f8b0ed3f40 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -75,6 +75,7 @@ set(SRC_FILES hash_utils.cc interval_holder.cc llvm_generator.cc + llvm_ir_store.cc llvm_types.cc literal_holder.cc projector.cc diff --git a/cpp/src/gandiva/cmake/GenerateBitcode.cmake b/cpp/src/gandiva/cmake/GenerateBitcode.cmake new file mode 100644 index 0000000000000..40155956e5d1a --- /dev/null +++ b/cpp/src/gandiva/cmake/GenerateBitcode.cmake @@ -0,0 +1,84 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Create bitcode for each of the source files. +function(generate_bitcode PRECOMPILED_SRC_LIST OUTPUT_VAR) + set(LOCAL_BC_FILES "") + + if(MSVC) + # clang pretends to be a particular version of MSVC. Thestandard + # library uses C++14 features, so we have to use that -std version + # to get the IR compilation to work. + # See https://cmake.org/cmake/help/latest/variable/MSVC_VERSION.html + # for MSVC_VERSION and Visual Studio version. + if(MSVC_VERSION LESS 1930) + set(FMS_COMPATIBILITY 19.20) + elseif(MSVC_VERSION LESS 1920) + set(FMS_COMPATIBILITY 19.10) + else() + message(FATAL_ERROR "Unsupported MSVC_VERSION=${MSVC_VERSION}") + endif() + set(PLATFORM_CLANG_OPTIONS -std=c++17 -fms-compatibility + -fms-compatibility-version=${FMS_COMPATIBILITY}) + else() + set(PLATFORM_CLANG_OPTIONS -std=c++17) + endif() + + foreach(SRC_FILE ${PRECOMPILED_SRC_LIST}) + get_filename_component(SRC_BASE ${SRC_FILE} NAME_WE) + get_filename_component(ABSOLUTE_SRC ${SRC_FILE} ABSOLUTE) + set(BC_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SRC_BASE}.bc) + set(PRECOMPILE_COMMAND) + if(CMAKE_OSX_SYSROOT) + list(APPEND + PRECOMPILE_COMMAND + ${CMAKE_COMMAND} + -E + env + SDKROOT=${CMAKE_OSX_SYSROOT}) + endif() + list(APPEND + PRECOMPILE_COMMAND + ${CLANG_EXECUTABLE} + ${PLATFORM_CLANG_OPTIONS} + -DGANDIVA_IR + -DNDEBUG # DCHECK macros not implemented in precompiled code + -DARROW_STATIC # Do not set __declspec(dllimport) on MSVC on Arrow symbols + -DGANDIVA_STATIC # Do not set __declspec(dllimport) on MSVC on Gandiva symbols + -fno-use-cxa-atexit # Workaround for unresolved __dso_handle + -emit-llvm + -O3 + -c + ${ABSOLUTE_SRC} + -o + ${BC_FILE} + ${ARROW_GANDIVA_PC_CXX_FLAGS} + -I${CMAKE_SOURCE_DIR}/src + -I${ARROW_BINARY_DIR}/src) + + if(NOT ARROW_USE_NATIVE_INT128) + foreach(boost_include_dir ${Boost_INCLUDE_DIRS}) + list(APPEND PRECOMPILE_COMMAND -I${boost_include_dir}) + endforeach() + endif() + add_custom_command(OUTPUT ${BC_FILE} + COMMAND ${PRECOMPILE_COMMAND} + DEPENDS ${SRC_FILE}) + list(APPEND LOCAL_BC_FILES ${BC_FILE}) + endforeach() + set(${OUTPUT_VAR} "${LOCAL_BC_FILES}" PARENT_SCOPE) +endfunction() diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index b6c78da89d575..2b4121ee7af29 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -22,6 +22,7 @@ #endif #include "gandiva/engine.h" +#include "gandiva/llvm_ir_store.h" #include #include @@ -152,6 +153,7 @@ Status Engine::LoadFunctionIRs() { if (!functions_loaded_) { ARROW_RETURN_NOT_OK(LoadPreCompiledIR()); ARROW_RETURN_NOT_OK(DecimalIR::AddFunctions(this)); + ARROW_RETURN_NOT_OK(LoadExternalPreCompiledIR()); functions_loaded_ = true; } return Status::OK(); @@ -248,11 +250,11 @@ Status Engine::LoadPreCompiledIR() { Status::CodeGenError("Could not load module from IR: ", buffer_or_error.getError().message())); - std::unique_ptr buffer = std::move(buffer_or_error.get()); + auto buffer = std::move(buffer_or_error.get()); /// Parse the IR module. - llvm::Expected> module_or_error = - llvm::getOwningLazyBitcodeModule(std::move(buffer), *context()); + auto module_or_error = llvm::getOwningLazyBitcodeModule(std::move(buffer), *context()); + if (!module_or_error) { // NOTE: llvm::handleAllErrors() fails linking with RTTI-disabled LLVM builds // (ARROW-5148) @@ -261,7 +263,8 @@ Status Engine::LoadPreCompiledIR() { stream << module_or_error.takeError(); return Status::CodeGenError(stream.str()); } - std::unique_ptr ir_module = std::move(module_or_error.get()); + + auto ir_module = std::move(module_or_error.get()); // set dataLayout SetDataLayout(ir_module.get()); @@ -274,6 +277,27 @@ Status Engine::LoadPreCompiledIR() { return Status::OK(); } +Status Engine::LoadExternalPreCompiledIR() { + auto const& buffers = LLVMIRStore::Get()->GetIRBuffers(); + for (auto const& buffer : buffers) { + auto module_or_error = llvm::parseBitcodeFile(buffer->getMemBufferRef(), *context()); + if (!module_or_error) { + std::string str; + llvm::raw_string_ostream stream(str); + stream << module_or_error.takeError(); + return Status::CodeGenError("Failed to parse bitcode file, error: " + stream.str()); + } + auto ir_module = std::move(module_or_error.get()); + + ARROW_RETURN_IF(llvm::verifyModule(*ir_module, &llvm::errs()), + Status::CodeGenError("verify of IR Module failed")); + ARROW_RETURN_IF(llvm::Linker::linkModules(*module_, std::move(ir_module)), + Status::CodeGenError("failed to link IR Modules")); + } + + return Status::OK(); +} + // Get rid of all functions that don't need to be compiled. // This helps in reducing the overall compilation time. This pass is trivial, // and is always done since the number of functions in gandiva is very high. diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index a4d6a5fd1a758..f547527baa327 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -93,6 +93,9 @@ class GANDIVA_EXPORT Engine { /// the main module. Status LoadPreCompiledIR(); + // load external pre-compiled IR modules from LLVMIRStore + Status LoadExternalPreCompiledIR(); + // Create and add mappings for cpp functions that can be accessed from LLVM. void AddGlobalMappings(); diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 67b7b404b325c..47a85a79fb0e9 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -66,7 +66,7 @@ SignatureMap FunctionRegistry::InitPCMap() { pc_registry_.insert(std::end(pc_registry_), v6.begin(), v6.end()); for (auto& elem : pc_registry_) { for (auto& func_signature : elem.signatures()) { - map.insert(std::make_pair(&(func_signature), &elem)); + map.emplace(&func_signature, &elem); } } @@ -79,4 +79,13 @@ const NativeFunction* FunctionRegistry::LookupSignature( return got == pc_registry_map_.end() ? nullptr : got->second; } +Status FunctionRegistry::AddFunction(NativeFunction func) { + pc_registry_.emplace_back(std::move(func)); + auto const& last_func = pc_registry_.back(); + for (auto& func_signature : last_func.signatures()) { + pc_registry_map_.emplace(&func_signature, &last_func); + } + return arrow::Status::OK(); +} + } // namespace gandiva diff --git a/cpp/src/gandiva/function_registry.h b/cpp/src/gandiva/function_registry.h index d9256326019c6..fcba481c9cf16 100644 --- a/cpp/src/gandiva/function_registry.h +++ b/cpp/src/gandiva/function_registry.h @@ -33,6 +33,8 @@ class GANDIVA_EXPORT FunctionRegistry { /// Lookup a pre-compiled function by its signature. const NativeFunction* LookupSignature(const FunctionSignature& signature) const; + static Status AddFunction(NativeFunction func); + iterator begin() const; iterator end() const; iterator back() const; diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h index 04f9b854b1d29..22996d049de90 100644 --- a/cpp/src/gandiva/llvm_generator.h +++ b/cpp/src/gandiva/llvm_generator.h @@ -87,6 +87,7 @@ class GANDIVA_EXPORT LLVMGenerator { FRIEND_TEST(TestLLVMGenerator, VerifyPCFunctions); FRIEND_TEST(TestLLVMGenerator, TestAdd); FRIEND_TEST(TestLLVMGenerator, TestNullInternal); + FRIEND_TEST(TestLLVMGenerator, VerifyExtendedPCFunctions); llvm::LLVMContext* context() { return engine_->context(); } llvm::IRBuilder<>* ir_builder() { return engine_->ir_builder(); } diff --git a/cpp/src/gandiva/llvm_generator_test.cc b/cpp/src/gandiva/llvm_generator_test.cc index 028893b0b4594..33f858bb0321d 100644 --- a/cpp/src/gandiva/llvm_generator_test.cc +++ b/cpp/src/gandiva/llvm_generator_test.cc @@ -17,6 +17,7 @@ #include "gandiva/llvm_generator.h" +#include #include #include @@ -27,6 +28,7 @@ #include "gandiva/expression.h" #include "gandiva/func_descriptor.h" #include "gandiva/function_registry.h" +#include "gandiva/llvm_ir_store.h" #include "gandiva/tests/test_util.h" namespace gandiva { @@ -115,4 +117,14 @@ TEST_F(TestLLVMGenerator, TestAdd) { EXPECT_EQ(out_bitmap, 0ULL); } +TEST_F(TestLLVMGenerator, VerifyExtendedPCFunctions) { + ARROW_EXPECT_OK(LoadTestLLVMIR()); + std::unique_ptr generator; + ASSERT_OK(LLVMGenerator::Make(TestConfiguration(), false, &generator)); + + llvm::Module* module = generator->module(); + ASSERT_OK(generator->engine_->LoadFunctionIRs()); + EXPECT_NE(module->getFunction("multiply_by_two_int32"), nullptr); +} + } // namespace gandiva diff --git a/cpp/src/gandiva/llvm_ir_store.cc b/cpp/src/gandiva/llvm_ir_store.cc new file mode 100644 index 0000000000000..830edf346d93f --- /dev/null +++ b/cpp/src/gandiva/llvm_ir_store.cc @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/llvm_ir_store.h" + +#include + +namespace gandiva { + +LLVMIRStore* LLVMIRStore::Get() { + static auto* singleton = new LLVMIRStore(); + return singleton; +} + +Status LLVMIRStore::Add(const std::string& bitcode_file_path) { + llvm::ErrorOr> buffer_or_error = + llvm::MemoryBuffer::getFile(bitcode_file_path); + + ARROW_RETURN_IF(!buffer_or_error, + Status::CodeGenError("Could not load module from IR file: ", + bitcode_file_path + " Error: " + + buffer_or_error.getError().message())); + + auto buffer = std::move(buffer_or_error.get()); + memory_buffers_.push_back(std::move(buffer)); + return Status::OK(); +} + +Status LLVMIRStore::Add(std::unique_ptr buffer) { + memory_buffers_.push_back(std::move(buffer)); + return Status::OK(); +} + +std::vector>& LLVMIRStore::GetIRBuffers() { + return memory_buffers_; +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/llvm_ir_store.h b/cpp/src/gandiva/llvm_ir_store.h new file mode 100644 index 0000000000000..fdb11e6cf1085 --- /dev/null +++ b/cpp/src/gandiva/llvm_ir_store.h @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include + +namespace gandiva { +using arrow::Status; + +class GANDIVA_EXPORT LLVMIRStore { + public: + LLVMIRStore(const LLVMIRStore&) = delete; + LLVMIRStore& operator=(LLVMIRStore const&) = delete; + + static LLVMIRStore* Get(); + + // add an LLVM IR for a given bitcode file path + Status Add(const std::string& bitcode_file_path); + Status Add(std::unique_ptr buffer); + + std::vector>& GetIRBuffers(); + + private: + LLVMIRStore() = default; + std::vector> memory_buffers_; +}; +} // namespace gandiva diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt index 4ca5cc655b2a7..7a7ea33bb6d26 100644 --- a/cpp/src/gandiva/precompiled/CMakeLists.txt +++ b/cpp/src/gandiva/precompiled/CMakeLists.txt @@ -17,6 +17,8 @@ project(gandiva) +include(../cmake/GenerateBitcode.cmake) + set(PRECOMPILED_SRCS arithmetic_ops.cc bitmap.cc @@ -30,68 +32,7 @@ set(PRECOMPILED_SRCS timestamp_arithmetic.cc ../../arrow/util/basic_decimal.cc) -if(MSVC) - # clang pretends to be a particular version of MSVC. Thestandard - # library uses C++14 features, so we have to use that -std version - # to get the IR compilation to work. - # See https://cmake.org/cmake/help/latest/variable/MSVC_VERSION.html - # for MSVC_VERSION and Visual Studio version. - if(MSVC_VERSION LESS 1930) - set(FMS_COMPATIBILITY 19.20) - elseif(MSVC_VERSION LESS 1920) - set(FMS_COMPATIBILITY 19.10) - else() - message(FATAL_ERROR "Unsupported MSVC_VERSION=${MSVC_VERSION}") - endif() - set(PLATFORM_CLANG_OPTIONS -std=c++17 -fms-compatibility - -fms-compatibility-version=${FMS_COMPATIBILITY}) -else() - set(PLATFORM_CLANG_OPTIONS -std=c++17) -endif() - -# Create bitcode for each of the source files. -foreach(SRC_FILE ${PRECOMPILED_SRCS}) - get_filename_component(SRC_BASE ${SRC_FILE} NAME_WE) - get_filename_component(ABSOLUTE_SRC ${SRC_FILE} ABSOLUTE) - set(BC_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SRC_BASE}.bc) - set(PRECOMPILE_COMMAND) - if(CMAKE_OSX_SYSROOT) - list(APPEND - PRECOMPILE_COMMAND - ${CMAKE_COMMAND} - -E - env - SDKROOT=${CMAKE_OSX_SYSROOT}) - endif() - list(APPEND - PRECOMPILE_COMMAND - ${CLANG_EXECUTABLE} - ${PLATFORM_CLANG_OPTIONS} - -DGANDIVA_IR - -DNDEBUG # DCHECK macros not implemented in precompiled code - -DARROW_STATIC # Do not set __declspec(dllimport) on MSVC on Arrow symbols - -DGANDIVA_STATIC # Do not set __declspec(dllimport) on MSVC on Gandiva symbols - -fno-use-cxa-atexit # Workaround for unresolved __dso_handle - -emit-llvm - -O3 - -c - ${ABSOLUTE_SRC} - -o - ${BC_FILE} - ${ARROW_GANDIVA_PC_CXX_FLAGS} - -I${CMAKE_SOURCE_DIR}/src - -I${ARROW_BINARY_DIR}/src) - - if(NOT ARROW_USE_NATIVE_INT128) - foreach(boost_include_dir ${Boost_INCLUDE_DIRS}) - list(APPEND PRECOMPILE_COMMAND -I${boost_include_dir}) - endforeach() - endif() - add_custom_command(OUTPUT ${BC_FILE} - COMMAND ${PRECOMPILE_COMMAND} - DEPENDS ${SRC_FILE}) - list(APPEND BC_FILES ${BC_FILE}) -endforeach() +generate_bitcode("${PRECOMPILED_SRCS}" BC_FILES) # link all of the bitcode files into a single bitcode file. add_custom_command(OUTPUT ${GANDIVA_PRECOMPILED_BC_PATH} diff --git a/cpp/src/gandiva/tests/CMakeLists.txt b/cpp/src/gandiva/tests/CMakeLists.txt index 5fa2da16c632f..a63162ff7a99c 100644 --- a/cpp/src/gandiva/tests/CMakeLists.txt +++ b/cpp/src/gandiva/tests/CMakeLists.txt @@ -40,3 +40,5 @@ if(ARROW_BUILD_STATIC) EXTRA_LINK_LIBS gandiva_static) endif() + +add_subdirectory(external_functions) diff --git a/cpp/src/gandiva/tests/external_functions/CMakeLists.txt b/cpp/src/gandiva/tests/external_functions/CMakeLists.txt new file mode 100644 index 0000000000000..642576b65f4db --- /dev/null +++ b/cpp/src/gandiva/tests/external_functions/CMakeLists.txt @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +if(NO_TESTS) + return() +endif() +# +## copy the testing data into the build directory +add_custom_target(extension-tests-data + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR}) + +include(../../cmake/GenerateBitcode.cmake) +set(TEST_PRECOMPILED_SRCS multiply_by_two.cc) +generate_bitcode("${TEST_PRECOMPILED_SRCS}" TEST_BC_FILES) +add_custom_target(extension-tests ALL DEPENDS extension-tests-data ${TEST_BC_FILES}) +# +## set the GANDIVA_EXTENSION_TEST_DIR macro so that the tests can pass regardless where they are run from +## corresponding extension test data files and bitcode will be copied/generated +set(TARGETS_DEPENDING_ON_TEST_BITCODE_FILES gandiva-internals-test gandiva-projector-test gandiva-projector-test-static) +foreach(TARGET ${TARGETS_DEPENDING_ON_TEST_BITCODE_FILES}) + if(TARGET ${TARGET}) + add_dependencies(${TARGET} extension-tests) + target_compile_definitions(${TARGET} + PRIVATE -DGANDIVA_EXTENSION_TEST_DIR="${CMAKE_CURRENT_BINARY_DIR}") + endif() +endforeach() + +add_dependencies(gandiva-tests extension-tests) diff --git a/cpp/src/gandiva/tests/external_functions/multiply_by_two.cc b/cpp/src/gandiva/tests/external_functions/multiply_by_two.cc new file mode 100644 index 0000000000000..cc7e2b0f8267f --- /dev/null +++ b/cpp/src/gandiva/tests/external_functions/multiply_by_two.cc @@ -0,0 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "multiply_by_two.h" // NOLINT + +int64_t multiply_by_two_int32(int32_t value) { return value * 2; } diff --git a/cpp/src/gandiva/tests/external_functions/multiply_by_two.h b/cpp/src/gandiva/tests/external_functions/multiply_by_two.h new file mode 100644 index 0000000000000..b8aec5185457b --- /dev/null +++ b/cpp/src/gandiva/tests/external_functions/multiply_by_two.h @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +extern "C" { +int64_t multiply_by_two_int32(int32_t value); +} diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index 462fae64393fd..740a8905286a3 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -26,6 +26,7 @@ #include #include "arrow/memory_pool.h" +#include "gandiva/function_registry.h" #include "gandiva/literal_holder.h" #include "gandiva/node.h" #include "gandiva/tests/test_util.h" @@ -3582,4 +3583,35 @@ TEST_F(TestProjector, TestSqrtFloat64) { EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); } +Status RegisterTestExternalFunction() { + NativeFunction multiply_by_two_func( + "multiply_by_two", {}, {arrow::int32()}, arrow::int64(), + ResultNullableType::kResultNullIfNull, "multiply_by_two_int32"); + return FunctionRegistry::AddFunction(multiply_by_two_func); +} + +TEST_F(TestProjector, TestExtendedFunctions) { + ARROW_EXPECT_OK(RegisterTestExternalFunction()); + ARROW_EXPECT_OK(LoadTestLLVMIR()); + + auto in_field = field("in", arrow::int32()); + auto schema = arrow::schema({in_field}); + auto out_field = field("out", arrow::int64()); + // the multiply_by_two function is only available in the external function's IR bitcode + auto multiply = + TreeExprBuilder::MakeExpression("multiply_by_two", {in_field}, out_field); + + std::shared_ptr projector; + ARROW_EXPECT_OK(Projector::Make(schema, {multiply}, TestConfiguration(), &projector)); + + int num_records = 4; + auto array = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, true}); + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array}); + auto out = MakeArrowArrayInt64({2, 4, 6, 8}, {true, true, true, true}); + + arrow::ArrayVector outs; + ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs)); + EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); +} + } // namespace gandiva diff --git a/cpp/src/gandiva/tests/test_util.h b/cpp/src/gandiva/tests/test_util.h index 99df90769e0ad..fa0bb43326aba 100644 --- a/cpp/src/gandiva/tests/test_util.h +++ b/cpp/src/gandiva/tests/test_util.h @@ -16,6 +16,7 @@ // under the License. #include +#include #include #include #include @@ -24,6 +25,7 @@ #include "arrow/testing/gtest_util.h" #include "gandiva/arrow.h" #include "gandiva/configuration.h" +#include "gandiva/llvm_ir_store.h" #pragma once @@ -101,4 +103,17 @@ static inline std::shared_ptr TestConfiguration() { return builder.DefaultConfiguration(); } +#ifndef GANDIVA_EXTENSION_TEST_DIR +#define GANDIVA_EXTENSION_TEST_DIR "." +#endif + +static inline Status LoadTestLLVMIR() { + auto* llvm_ir_store = LLVMIRStore::Get(); + if (llvm_ir_store->GetIRBuffers().empty()) { + std::filesystem::path base(GANDIVA_EXTENSION_TEST_DIR); + std::filesystem::path ir_file = base / "multiply_by_two.bc"; + return llvm_ir_store->Add(ir_file.string()); + } + return arrow::Status::OK(); +} } // namespace gandiva