Skip to content

Commit

Permalink
Add external function registry support to gandiva. A new AddFunction …
Browse files Browse the repository at this point in the history
…is added to FunctionRegistry so that function metadata can be registered, and external pre-compiled LLVM bitcode/IR buffer can be added so that they can be used for building LLVM module.
  • Loading branch information
niyue committed Oct 8, 2023
1 parent e1d7459 commit 458e4ac
Show file tree
Hide file tree
Showing 17 changed files with 378 additions and 67 deletions.
1 change: 1 addition & 0 deletions cpp/src/gandiva/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ set(SRC_FILES
hash_utils.cc
interval_holder.cc
llvm_generator.cc
llvm_ir_store.cc
llvm_types.cc
literal_holder.cc
projector.cc
Expand Down
84 changes: 84 additions & 0 deletions cpp/src/gandiva/cmake/GenerateBitcode.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Create bitcode for each of the source files.
function(generate_bitcode PRECOMPILED_SRC_LIST OUTPUT_VAR)
set(LOCAL_BC_FILES "")

if(MSVC)
# clang pretends to be a particular version of MSVC. Thestandard
# library uses C++14 features, so we have to use that -std version
# to get the IR compilation to work.
# See https://cmake.org/cmake/help/latest/variable/MSVC_VERSION.html
# for MSVC_VERSION and Visual Studio version.
if(MSVC_VERSION LESS 1930)
set(FMS_COMPATIBILITY 19.20)
elseif(MSVC_VERSION LESS 1920)
set(FMS_COMPATIBILITY 19.10)
else()
message(FATAL_ERROR "Unsupported MSVC_VERSION=${MSVC_VERSION}")
endif()
set(PLATFORM_CLANG_OPTIONS -std=c++17 -fms-compatibility
-fms-compatibility-version=${FMS_COMPATIBILITY})
else()
set(PLATFORM_CLANG_OPTIONS -std=c++17)
endif()

foreach(SRC_FILE ${PRECOMPILED_SRC_LIST})
get_filename_component(SRC_BASE ${SRC_FILE} NAME_WE)
get_filename_component(ABSOLUTE_SRC ${SRC_FILE} ABSOLUTE)
set(BC_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SRC_BASE}.bc)
set(PRECOMPILE_COMMAND)
if(CMAKE_OSX_SYSROOT)
list(APPEND
PRECOMPILE_COMMAND
${CMAKE_COMMAND}
-E
env
SDKROOT=${CMAKE_OSX_SYSROOT})
endif()
list(APPEND
PRECOMPILE_COMMAND
${CLANG_EXECUTABLE}
${PLATFORM_CLANG_OPTIONS}
-DGANDIVA_IR
-DNDEBUG # DCHECK macros not implemented in precompiled code
-DARROW_STATIC # Do not set __declspec(dllimport) on MSVC on Arrow symbols
-DGANDIVA_STATIC # Do not set __declspec(dllimport) on MSVC on Gandiva symbols
-fno-use-cxa-atexit # Workaround for unresolved __dso_handle
-emit-llvm
-O3
-c
${ABSOLUTE_SRC}
-o
${BC_FILE}
${ARROW_GANDIVA_PC_CXX_FLAGS}
-I${CMAKE_SOURCE_DIR}/src
-I${ARROW_BINARY_DIR}/src)

if(NOT ARROW_USE_NATIVE_INT128)
foreach(boost_include_dir ${Boost_INCLUDE_DIRS})
list(APPEND PRECOMPILE_COMMAND -I${boost_include_dir})
endforeach()
endif()
add_custom_command(OUTPUT ${BC_FILE}
COMMAND ${PRECOMPILE_COMMAND}
DEPENDS ${SRC_FILE})
list(APPEND LOCAL_BC_FILES ${BC_FILE})
endforeach()
set(${OUTPUT_VAR} "${LOCAL_BC_FILES}" PARENT_SCOPE)
endfunction()
32 changes: 28 additions & 4 deletions cpp/src/gandiva/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#endif

#include "gandiva/engine.h"
#include "gandiva/llvm_ir_store.h"

#include <iostream>
#include <memory>
Expand Down Expand Up @@ -152,6 +153,7 @@ Status Engine::LoadFunctionIRs() {
if (!functions_loaded_) {
ARROW_RETURN_NOT_OK(LoadPreCompiledIR());
ARROW_RETURN_NOT_OK(DecimalIR::AddFunctions(this));
ARROW_RETURN_NOT_OK(LoadExternalPreCompiledIR());
functions_loaded_ = true;
}
return Status::OK();
Expand Down Expand Up @@ -248,11 +250,11 @@ Status Engine::LoadPreCompiledIR() {
Status::CodeGenError("Could not load module from IR: ",
buffer_or_error.getError().message()));

std::unique_ptr<llvm::MemoryBuffer> buffer = std::move(buffer_or_error.get());
auto buffer = std::move(buffer_or_error.get());

/// Parse the IR module.
llvm::Expected<std::unique_ptr<llvm::Module>> module_or_error =
llvm::getOwningLazyBitcodeModule(std::move(buffer), *context());
auto module_or_error = llvm::getOwningLazyBitcodeModule(std::move(buffer), *context());

if (!module_or_error) {
// NOTE: llvm::handleAllErrors() fails linking with RTTI-disabled LLVM builds
// (ARROW-5148)
Expand All @@ -261,7 +263,8 @@ Status Engine::LoadPreCompiledIR() {
stream << module_or_error.takeError();
return Status::CodeGenError(stream.str());
}
std::unique_ptr<llvm::Module> ir_module = std::move(module_or_error.get());

auto ir_module = std::move(module_or_error.get());

// set dataLayout
SetDataLayout(ir_module.get());
Expand All @@ -274,6 +277,27 @@ Status Engine::LoadPreCompiledIR() {
return Status::OK();
}

Status Engine::LoadExternalPreCompiledIR() {
auto const& buffers = LLVMIRStore::Get()->GetIRBuffers();
for (auto const& buffer : buffers) {
auto module_or_error = llvm::parseBitcodeFile(buffer->getMemBufferRef(), *context());
if (!module_or_error) {
std::string str;
llvm::raw_string_ostream stream(str);
stream << module_or_error.takeError();
return Status::CodeGenError("Failed to parse bitcode file, error: " + stream.str());
}
auto ir_module = std::move(module_or_error.get());

ARROW_RETURN_IF(llvm::verifyModule(*ir_module, &llvm::errs()),
Status::CodeGenError("verify of IR Module failed"));
ARROW_RETURN_IF(llvm::Linker::linkModules(*module_, std::move(ir_module)),
Status::CodeGenError("failed to link IR Modules"));
}

return Status::OK();
}

// Get rid of all functions that don't need to be compiled.
// This helps in reducing the overall compilation time. This pass is trivial,
// and is always done since the number of functions in gandiva is very high.
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/gandiva/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ class GANDIVA_EXPORT Engine {
/// the main module.
Status LoadPreCompiledIR();

// load external pre-compiled IR modules from LLVMIRStore
Status LoadExternalPreCompiledIR();

// Create and add mappings for cpp functions that can be accessed from LLVM.
void AddGlobalMappings();

Expand Down
11 changes: 10 additions & 1 deletion cpp/src/gandiva/function_registry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ SignatureMap FunctionRegistry::InitPCMap() {
pc_registry_.insert(std::end(pc_registry_), v6.begin(), v6.end());
for (auto& elem : pc_registry_) {
for (auto& func_signature : elem.signatures()) {
map.insert(std::make_pair(&(func_signature), &elem));
map.emplace(&func_signature, &elem);
}
}

Expand All @@ -79,4 +79,13 @@ const NativeFunction* FunctionRegistry::LookupSignature(
return got == pc_registry_map_.end() ? nullptr : got->second;
}

Status FunctionRegistry::AddFunction(NativeFunction func) {
pc_registry_.emplace_back(std::move(func));
auto const& last_func = pc_registry_.back();
for (auto& func_signature : last_func.signatures()) {
pc_registry_map_.emplace(&func_signature, &last_func);
}
return arrow::Status::OK();
}

} // namespace gandiva
2 changes: 2 additions & 0 deletions cpp/src/gandiva/function_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class GANDIVA_EXPORT FunctionRegistry {
/// Lookup a pre-compiled function by its signature.
const NativeFunction* LookupSignature(const FunctionSignature& signature) const;

static Status AddFunction(NativeFunction func);

iterator begin() const;
iterator end() const;
iterator back() const;
Expand Down
1 change: 1 addition & 0 deletions cpp/src/gandiva/llvm_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class GANDIVA_EXPORT LLVMGenerator {
FRIEND_TEST(TestLLVMGenerator, VerifyPCFunctions);
FRIEND_TEST(TestLLVMGenerator, TestAdd);
FRIEND_TEST(TestLLVMGenerator, TestNullInternal);
FRIEND_TEST(TestLLVMGenerator, VerifyExtendedPCFunctions);

llvm::LLVMContext* context() { return engine_->context(); }
llvm::IRBuilder<>* ir_builder() { return engine_->ir_builder(); }
Expand Down
12 changes: 12 additions & 0 deletions cpp/src/gandiva/llvm_generator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "gandiva/llvm_generator.h"

#include <filesystem>
#include <memory>
#include <vector>

Expand All @@ -27,6 +28,7 @@
#include "gandiva/expression.h"
#include "gandiva/func_descriptor.h"
#include "gandiva/function_registry.h"
#include "gandiva/llvm_ir_store.h"
#include "gandiva/tests/test_util.h"

namespace gandiva {
Expand Down Expand Up @@ -115,4 +117,14 @@ TEST_F(TestLLVMGenerator, TestAdd) {
EXPECT_EQ(out_bitmap, 0ULL);
}

TEST_F(TestLLVMGenerator, VerifyExtendedPCFunctions) {
ARROW_EXPECT_OK(LoadTestLLVMIR());
std::unique_ptr<LLVMGenerator> generator;
ASSERT_OK(LLVMGenerator::Make(TestConfiguration(), false, &generator));

llvm::Module* module = generator->module();
ASSERT_OK(generator->engine_->LoadFunctionIRs());
EXPECT_NE(module->getFunction("multiply_by_two_int32"), nullptr);
}

} // namespace gandiva
52 changes: 52 additions & 0 deletions cpp/src/gandiva/llvm_ir_store.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "gandiva/llvm_ir_store.h"

#include <llvm/Bitcode/BitcodeReader.h>

namespace gandiva {

LLVMIRStore* LLVMIRStore::Get() {
static auto* singleton = new LLVMIRStore();
return singleton;
}

Status LLVMIRStore::Add(const std::string& bitcode_file_path) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> buffer_or_error =
llvm::MemoryBuffer::getFile(bitcode_file_path);

ARROW_RETURN_IF(!buffer_or_error,
Status::CodeGenError("Could not load module from IR file: ",
bitcode_file_path + " Error: " +
buffer_or_error.getError().message()));

auto buffer = std::move(buffer_or_error.get());
memory_buffers_.push_back(std::move(buffer));
return Status::OK();
}

Status LLVMIRStore::Add(std::unique_ptr<llvm::MemoryBuffer> buffer) {
memory_buffers_.push_back(std::move(buffer));
return Status::OK();
}

std::vector<std::unique_ptr<llvm::MemoryBuffer>>& LLVMIRStore::GetIRBuffers() {
return memory_buffers_;
}

} // namespace gandiva
46 changes: 46 additions & 0 deletions cpp/src/gandiva/llvm_ir_store.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <arrow/status.h>
#include <gandiva/visibility.h>
#include <llvm/Support/MemoryBuffer.h>
#include <memory>
#include <vector>

namespace gandiva {
using arrow::Status;

class GANDIVA_EXPORT LLVMIRStore {
public:
LLVMIRStore(const LLVMIRStore&) = delete;
LLVMIRStore& operator=(LLVMIRStore const&) = delete;

static LLVMIRStore* Get();

// add an LLVM IR for a given bitcode file path
Status Add(const std::string& bitcode_file_path);
Status Add(std::unique_ptr<llvm::MemoryBuffer> buffer);

std::vector<std::unique_ptr<llvm::MemoryBuffer>>& GetIRBuffers();

private:
LLVMIRStore() = default;
std::vector<std::unique_ptr<llvm::MemoryBuffer>> memory_buffers_;
};
} // namespace gandiva
Loading

0 comments on commit 458e4ac

Please sign in to comment.