diff --git a/.github/actions/prepare_git_user/action.yml b/.github/actions/prepare_git_user/action.yml new file mode 100644 index 000000000..c44f1bd9d --- /dev/null +++ b/.github/actions/prepare_git_user/action.yml @@ -0,0 +1,10 @@ +name: 'Setup Git User for Applying Patches' +description: 'Setup bot user so git doesnt complain when patching' + # See this thread for more details https://github.community/t/github-actions-bot-email-address/17204/5 +runs: + using: "composite" + steps: + - shell: bash + run: | + git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a2c35f5da..33fffd41e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,6 +36,7 @@ jobs: - uses: actions/checkout@v2 with: fetch-depth: 0 + - uses: ./.github/actions/prepare_git_user - name: Build with build script shell: bash run: | @@ -48,6 +49,11 @@ jobs: export VCPKG_ROOT=$(pwd)/../lifting-bits-downloads/vcpkg_${{ matrix.image.name }}-${{ matrix.image.tag }}_llvm-${{ matrix.llvm }}_amd64 export INSTALL_DIR=$(pwd)/remill-preset-install ./scripts/build-preset.sh release + + - name: Install Python Test Deps + shell: bash + run: | + pip3 install --user ./scripts/diff_tester_export_insns - name: Run tests shell: bash working-directory: remill-build @@ -102,6 +108,7 @@ jobs: - uses: actions/checkout@v2 with: fetch-depth: 0 + - uses: ./.github/actions/prepare_git_user - name: Build with build script shell: bash run: | @@ -114,6 +121,10 @@ jobs: export VCPKG_ROOT=$(pwd)/../lifting-bits-downloads/vcpkg_${{ matrix.os}}_llvm-${{ matrix.llvm }}_xcode-13.0_amd64 export INSTALL_DIR=$(pwd)/remill-preset-install ./scripts/build-preset.sh release + - name: Install Python Test Deps + shell: bash + run: | + pip3 install --user ./scripts/diff_tester_export_insns - name: Run tests shell: bash working-directory: remill-build diff --git a/.gitignore b/.gitignore index e331c6e90..cb5905b53 100644 --- a/.gitignore +++ b/.gitignore @@ -26,7 +26,6 @@ cmake-build-debug cmake-build-release compile_commands.json -bin/* third_party/* build/* diff --git a/CMakeLists.txt b/CMakeLists.txt index a1858504a..febd65800 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -20,7 +20,7 @@ include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ccache.cmake") project(remill C CXX ASM) include(GNUInstallDirs) - +include(FetchContent) include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/settings.cmake") include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/utils.cmake") include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/options.cmake") @@ -72,6 +72,7 @@ include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/BCCompiler.cmake") # Go find only the static libraries of LLVM, and link against those. foreach(LLVM_LIB IN LISTS LLVM_AVAILABLE_LIBS) get_target_property(LLVM_LIB_TYPE ${LLVM_LIB} TYPE) + if(LLVM_LIB_TYPE STREQUAL "STATIC_LIBRARY") list(APPEND LLVM_LIBRARIES "${LLVM_LIB}") endif() @@ -88,7 +89,7 @@ target_link_libraries(thirdparty_llvm INTERFACE # Microsoft Z3 with LLVM. Not exactly used in remill, but LLVM doesn't link # against it correctly # NOTE: If changing this, also replicate in remillConfig file -if (LLVM_WITH_Z3) +if(LLVM_WITH_Z3) find_package(Z3 CONFIG REQUIRED 4.7.1) get_target_property(LLVMSupport_LIBS LLVMSupport INTERFACE_LINK_LIBRARIES) list(REMOVE_ITEM LLVMSupport_LIBS Z3) @@ -120,15 +121,32 @@ target_link_libraries(thirdparty_gflags INTERFACE gflags ) +set(sleigh_ENABLE_TESTS OFF) +set(sleigh_ADDITIONAL_PATCHES "${CMAKE_CURRENT_SOURCE_DIR}/patches/sleigh/x86-ia.patch;${CMAKE_CURRENT_SOURCE_DIR}/patches/sleigh/arm-thumb.patch") + +# GHIDRA SLEIGH +FetchContent_Declare(sleigh + GIT_REPOSITORY https://github.com/lifting-bits/sleigh.git + GIT_TAG 5ee2f2c16250a6529108e6a6fff89e0f147502d2 +) + +FetchContent_MakeAvailable(sleigh) +add_library(thirdparty_sleigh INTERFACE) +target_link_libraries(thirdparty_sleigh INTERFACE + sleigh::sla + sleigh::decomp + sleigh::support +) + # Windows SDK add_library(thirdparty_win32 INTERFACE) + if(DEFINED WIN32) target_link_libraries(thirdparty_win32 INTERFACE - "Kernel32.lib" + "Kernel32.lib" ) endif() - # For Linux builds, group LLVM libraries into a single group # that avoids frustrating library ordering issues. if(UNIX AND NOT APPLE) @@ -150,7 +168,6 @@ option(REMILL_BUILD_SPARC32_RUNTIME "Build the Runtime for SPARC32. Turn this of # # add everything as public. - add_library(remill_settings INTERFACE) target_include_directories(remill_settings INTERFACE @@ -235,7 +252,7 @@ target_compile_definitions(remill_settings INTERFACE "REMILL_BUILD_SEMANTICS_DIR_SPARC64=\"${REMILL_BUILD_SEMANTICS_DIR_SPARC64}\"" ) -set(THIRDPARTY_LIBRARY_LIST thirdparty_llvm thirdparty_xed thirdparty_glog thirdparty_gflags) +set(THIRDPARTY_LIBRARY_LIST thirdparty_llvm thirdparty_xed thirdparty_glog thirdparty_gflags thirdparty_sleigh) target_link_libraries(remill_settings INTERFACE ${THIRDPARTY_LIBRARY_LIST} ) @@ -259,7 +276,6 @@ target_link_libraries(remill INTERFACE # # Also install clang, libllvm and llvm-link # - set(INSTALLED_CLANG_NAME "remill-clang-${REMILL_LLVM_VERSION}${CMAKE_EXECUTABLE_SUFFIX}") set(INSTALLED_LLVMLINK_NAME "remill-llvm-link-${REMILL_LLVM_VERSION}${CMAKE_EXECUTABLE_SUFFIX}") @@ -269,15 +285,18 @@ InstallExternalTarget("ext_llvmlink" "${LLVMLINK_PATH}" "BIN" "${INSTALLED_LLVML GetTargetTree(THIRDPARTY_LIBRARIES ${THIRDPARTY_LIBRARY_LIST}) GetPublicIncludeFolders(THIRDPARTY_INCLUDE_DIRECTORIES ${THIRDPARTY_LIBRARIES}) + foreach(THIRDPARTY_LIB IN LISTS THIRDPARTY_LIBRARIES) string(SUBSTRING "${THIRDPARTY_LIB}" 0 1 THIRDPARTY_LIB_PREFIX) + if(TARGET ${THIRDPARTY_LIB}) get_target_property(THIRDPARTY_LIB_TYPE ${THIRDPARTY_LIB} TYPE) + if(THIRDPARTY_LIB_TYPE STREQUAL "STATIC_LIBRARY" OR THIRDPARTY_LIB_TYPE STREQUAL "SHARED_LIBRARY") list(APPEND THIRDPARTY_LIBRARY_FILES "$${}") endif() elseif("${THIRDPARTY_LIB_PREFIX}" STREQUAL "$${}") - # E.g. $ + # E.g. $ else() list(APPEND THIRDPARTY_LIBRARY_FILES "${THIRDPARTY_LIB}") endif() @@ -288,31 +307,33 @@ list(REMOVE_DUPLICATES THIRDPARTY_LIBRARY_FILES) # # additional targets # - add_custom_target(semantics) +# shared JIT tools +add_subdirectory(test_runner_lib) + # tools add_subdirectory(bin) if(REMILL_ENABLE_INSTALL_TARGET) install(TARGETS remill EXPORT remillTargets) - + install(TARGETS remill_settings ${THIRDPARTY_LIBRARY_LIST} EXPORT remillTargets ) - + # First do the basic substitutions. configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/remillConfig.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/remillConfig.cmake" @ONLY ) - + install(FILES "${CMAKE_CURRENT_BINARY_DIR}/remillConfig.cmake" - "${CMAKE_CURRENT_LIST_DIR}/cmake/vcpkg_helper.cmake" + "${CMAKE_CURRENT_LIST_DIR}/cmake/vcpkg_helper.cmake" DESTINATION "${REMILL_INSTALL_LIB_DIR}/cmake/remill" ) - + install(DIRECTORY "${REMILL_INCLUDE_DIR}/remill/" DESTINATION "${REMILL_INSTALL_INCLUDE_DIR}/remill" ) @@ -322,7 +343,7 @@ if(REMILL_ENABLE_INSTALL_TARGET) endif() # tests -if (REMILL_ENABLE_TESTING) +if(REMILL_ENABLE_TESTING) # Tests require enabling exports on binaries # https://cmake.org/cmake/help/latest/variable/CMAKE_ENABLE_EXPORTS.html#variable:CMAKE_ENABLE_EXPORTS set(CMAKE_ENABLE_EXPORTS ON) @@ -330,6 +351,11 @@ if (REMILL_ENABLE_TESTING) find_package(Threads REQUIRED) add_custom_target(test_dependencies) + if(REMILL_ENABLE_TESTING_SLEIGH_THUMB) + message(STATUS "thumb tests enabled") + add_subdirectory(tests/Thumb) + endif() + if(REMILL_ENABLE_TESTING_X86) message(STATUS "X86 tests enabled") add_subdirectory(tests/X86) diff --git a/Dockerfile b/Dockerfile index 48fe12cba..295a8f065 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,10 +30,15 @@ ARG LLVM_VERSION WORKDIR /remill COPY ./ ./ + +RUN git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com" && git config --global user.name "github-actions[bot]" + RUN ./scripts/build.sh \ - --llvm-version ${LLVM_VERSION} \ - --prefix /opt/trailofbits \ - --extra-cmake-args "-DCMAKE_BUILD_TYPE=Release" + --llvm-version ${LLVM_VERSION} \ + --prefix /opt/trailofbits \ + --extra-cmake-args "-DCMAKE_BUILD_TYPE=Release" + +RUN pip3 install ./scripts/diff_tester_export_insns RUN cd remill-build && \ cmake --build . --target test_dependencies -- -j $(nproc) && \ diff --git a/bin/CMakeLists.txt b/bin/CMakeLists.txt index 1a6a62a8e..75477acd8 100644 --- a/bin/CMakeLists.txt +++ b/bin/CMakeLists.txt @@ -1,10 +1,10 @@ -# Copyright (c) 2017 Trail of Bits, Inc. +# Copyright (c) 2022 Trail of Bits, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -14,3 +14,6 @@ add_subdirectory(lift) +if(REMILL_ENABLE_DIFFERENTIAL_TESTING) + add_subdirectory(differential_tester_x86) +endif() \ No newline at end of file diff --git a/bin/differential_tester_x86/CMakeLists.txt b/bin/differential_tester_x86/CMakeLists.txt new file mode 100644 index 000000000..10c4fa760 --- /dev/null +++ b/bin/differential_tester_x86/CMakeLists.txt @@ -0,0 +1,35 @@ +# Copyright (c) 2022 Trail of Bits, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +find_package(Python COMPONENTS Interpreter) +add_executable( + lift-and-compare + LiftAndCompare.cpp + Whitelist.cpp + Whitelist.h +) + +target_link_libraries( + lift-and-compare + PRIVATE + remill + thirdparty_glog + test-runner +) + +set_property(TARGET lift-and-compare PROPERTY ENABLE_EXPORTS ON) +set_property(TARGET lift-and-compare PROPERTY POSITION_INDEPENDENT_CODE ON) +enable_testing() + +add_test(NAME "small_diff_test" COMMAND "${Python_EXECUTABLE}" ${REMILL_SOURCE_DIR}/scripts/diff_tester_export_insns/diff_tester_export_insns/ci_runner.py --required_success_rate 1.0 --difftester_bin ${CMAKE_BINARY_DIR}/bin/differential_tester_x86/lift-and-compare --workdir ${CMAKE_BINARY_DIR} ${REMILL_SOURCE_DIR}/bin/differential_tester_x86/data/small_test/ --whitelist_file ${REMILL_SOURCE_DIR}/bin/differential_tester_x86/whitelist.json) \ No newline at end of file diff --git a/bin/differential_tester_x86/LiftAndCompare.cpp b/bin/differential_tester_x86/LiftAndCompare.cpp new file mode 100644 index 000000000..2909ba85b --- /dev/null +++ b/bin/differential_tester_x86/LiftAndCompare.cpp @@ -0,0 +1,469 @@ +/* + * Copyright (c) 2022 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "Whitelist.h" +#include "gtest/gtest.h" + + +DEFINE_string(target_insn_file, "", "Path to input test cases"); +DEFINE_uint64(num_iterations, 2, "number of iterations per test case"); +DEFINE_string(repro_file, "", "File to output failing test cases"); +DEFINE_string(whitelist, "", "File listing instruction states not to check"); +DEFINE_bool(should_dump_functions, false, "Dump each function version"); +DEFINE_bool(stop_on_fail, false, "Stop on first failure"); + + +struct InstructionFunction { + llvm::Function *llvm_function; + std::string isel_name; +}; + +class DiffModule { + private: + std::unique_ptr mod; + + std::tuple functions_to_compare; + + public: + DiffModule(std::unique_ptr mod_, llvm::Function *f1_, + llvm::Function *f2_, std::string f1_insn_name_, + std::string f2_insn_name_) + : mod(std::move(mod_)), + functions_to_compare({{f1_, f1_insn_name_}, {f2_, f2_insn_name_}}) {} + + llvm::Module *GetModule() { + return this->mod.get(); + } + + template + InstructionFunction GetF() const { + return std::get(this->functions_to_compare); + } +}; + +class DifferentialModuleBuilder { + private: + std::unique_ptr context; + std::shared_ptr semantics_module; + + test_runner::LiftingTester l1; + test_runner::LiftingTester l2; + DifferentialModuleBuilder(std::unique_ptr context_, + std::shared_ptr semantics_module_, + + test_runner::LiftingTester l1_, + test_runner::LiftingTester l2_) + : context(std::move(context_)), + semantics_module(std::move(semantics_module_)), + l1(std::move(l1_)), + l2(std::move(l2_)) {} + + public: + static DifferentialModuleBuilder + Create(remill::OSName os_name_1, remill::ArchName arch_name_1, + remill::OSName os_name_2, remill::ArchName arch_name_2) { + // it is expected that compatible arches share a semantics module. + std::unique_ptr context = + std::make_unique(); + context->enableOpaquePointers(); + auto tmp_arch = remill::Arch::Build(context.get(), os_name_1, arch_name_1); + std::shared_ptr semantics_module = + remill::LoadArchSemantics(tmp_arch.get()); + tmp_arch->PrepareModule(semantics_module.get()); + auto l1 = + test_runner::LiftingTester(semantics_module, os_name_1, arch_name_1); + auto l2 = + test_runner::LiftingTester(semantics_module, os_name_2, arch_name_2); + return DifferentialModuleBuilder(std::move(context), + std::move(semantics_module), std::move(l1), + std::move(l2)); + } + + public: + std::optional build(std::string_view fname_f1, + std::string_view fname_f2, + std::string_view bytes, uint64_t address) { + auto module = std::make_unique("", *this->context); + auto maybe_f1 = this->l1.LiftInstructionFunction(fname_f1, bytes, address); + auto maybe_f2 = this->l2.LiftInstructionFunction(fname_f2, bytes, address); + + if (!maybe_f1.has_value() || !maybe_f2.has_value()) { + return std::nullopt; + } + + auto f1_and_name = *maybe_f1; + auto f2_and_name = *maybe_f2; + + auto f1 = f1_and_name.first; + auto f2 = f2_and_name.first; + + for (auto x : {f1, f2}) { + llvm::verifyFunction(*x, &llvm::errs()); + } + + + auto tst = f1->getParent(); + + CHECK(remill::VerifyModule(tst)); + + auto cloned = llvm::CloneModule(*tst); + + if (auto maybe_message = remill::VerifyModuleMsg(cloned.get())) { + LOG(FATAL) << *maybe_message; + } + + remill::OptimizeBareModule(cloned); + + auto new_f1 = + test_runner::CopyFunctionIntoNewModule(module.get(), f1, cloned); + auto new_f2 = + test_runner::CopyFunctionIntoNewModule(module.get(), f2, cloned); + + + return DiffModule(std::move(module), new_f1, new_f2, + f1_and_name.second.function, f2_and_name.second.function); + } +}; + +using random_bytes_engine = + std::independent_bits_engine; + + +std::string PrintState(X86State *state) { + return ""; +} + +struct DiffTestResult { + std::string init_state_dump; + std::string struct_dump1; + std::string struct_dump2; + bool are_equal; +}; + + +class ComparisonRunner { + private: + random_bytes_engine rbe; + llvm::support::endianness endian; + + + public: + ComparisonRunner(llvm::support::endianness endian_) : endian(endian_) {} + + private: + template + void addRegTo(llvm::json::Object &obj, std::string name, T value) { + obj[name] = value; + } + + + std::string DumpState(X86State *st) { + + llvm::json::Object mapper; + llvm::json::Object gpr; + addRegTo(gpr, "eax", st->gpr.rax.dword); + addRegTo(gpr, "ebx", st->gpr.rbx.dword); + addRegTo(gpr, "ecx", st->gpr.rcx.dword); + addRegTo(gpr, "edx", st->gpr.rdx.dword); + addRegTo(gpr, "eip", st->gpr.rip.dword); + addRegTo(gpr, "esp", st->gpr.rsp.dword); + addRegTo(gpr, "esi", st->gpr.rsi.dword); + addRegTo(gpr, "edi", st->gpr.rdi.dword); + + + llvm::json::Object flags; + addRegTo(flags, "zf", st->aflag.zf); + addRegTo(flags, "of", st->aflag.of); + addRegTo(flags, "pf", st->aflag.pf); + addRegTo(flags, "cf", st->aflag.cf); + addRegTo(flags, "df", st->aflag.df); + addRegTo(flags, "sf", st->aflag.sf); + addRegTo(flags, "af", st->aflag.af); + + + mapper["gpr"] = std::move(gpr); + mapper["flags"] = std::move(flags); + std::string res; + llvm::json::Value v(std::move(mapper)); + llvm::raw_string_ostream ss(res); + ss << v; + + return ss.str(); + } + + public: + DiffTestResult + SingleCmpRun(size_t insn_length, llvm::Function *f1, llvm::Function *f2, + const std::vector &whitelist, + std::string_view isel_name) { + + X86State func1_state{}; + test_runner::RandomizeState(func1_state, this->rbe); + func1_state.addr.ds_base.dword = 0; + func1_state.addr.ss_base.dword = 0; + func1_state.addr.es_base.dword = 0; + func1_state.addr.cs_base.dword = 0; + func1_state.aflag.af = test_runner::random_boolean_flag(this->rbe); + func1_state.aflag.cf = test_runner::random_boolean_flag(this->rbe); + func1_state.aflag.df = test_runner::random_boolean_flag(this->rbe); + func1_state.aflag.of = test_runner::random_boolean_flag(this->rbe); + func1_state.aflag.pf = test_runner::random_boolean_flag(this->rbe); + func1_state.aflag.sf = test_runner::random_boolean_flag(this->rbe); + func1_state.aflag.zf = test_runner::random_boolean_flag(this->rbe); + + if (isel_name.rfind("REP_") != std::string::npos) { + LOG(INFO) << "setting ecx to 1"; + func1_state.gpr.rcx.dword = 1; + } + + X86State func2_state{}; + + auto init_state = this->DumpState(&func1_state); + + std::memcpy(&func2_state, &func1_state, sizeof(X86State)); + + auto mem_handler = + std::make_unique(this->endian); + auto pc_fetch = [](X86State *st) { return st->gpr.rip.qword; }; + test_runner::ExecuteLiftedFunction( + f1, insn_length, &func1_state, mem_handler.get(), pc_fetch); + auto second_handler = std::make_unique( + this->endian, mem_handler->GetUninitializedReads()); + test_runner::ExecuteLiftedFunction( + f2, insn_length, &func2_state, second_handler.get(), pc_fetch); + + + auto memory_state_eq = + mem_handler->GetMemory() == second_handler->GetMemory(); + + // NOTE(Ian): Here we log differences in instructions that arise from a different memory interaction. + if (!memory_state_eq) { + LOG(ERROR) << "Memory state differs"; + LOG(ERROR) << mem_handler->DumpState(); + LOG(ERROR) << second_handler->DumpState(); + } + + for (const auto &it : whitelist) { + it.ApplyToInsn(isel_name, &func1_state); + it.ApplyToInsn(isel_name, &func2_state); + } + + auto are_equal = + std::memcmp(&func1_state, &func2_state, sizeof(X86State)) == 0 && + memory_state_eq; + + + return {init_state, this->DumpState(&func1_state), + this->DumpState(&func2_state), are_equal}; + } +}; + +struct TestCase { + uint64_t addr; + std::string bytes; +}; + +namespace llvm::json { +bool fromJSON(const Value &E, TestCase &Out, Path P) { + auto byte_string = E.getAsString(); + if (!byte_string.hasValue()) { + P.report("Expected hex string of instruction bytes"); + return false; + } + + auto bytes = llvm::fromHex(byte_string.getValue()); + + Out.bytes = bytes; + // Should maybe do something else here? + Out.addr = 0xdeadbe00; + return true; +} +}; // namespace llvm::json + + +std::string test_case_name(std::string_view prefix, uint64_t test_cast_ctr) { + std::stringstream ss; + ss << prefix << "comp_func" << test_cast_ctr; + return ss.str(); +} + +// Returns true when test case succeeds +bool runTestCase(const TestCase &tc, DifferentialModuleBuilder &diffbuilder, + const std::vector &whitelist, + uint64_t ctr) { + LOG(INFO) << "Starting testcase: " << llvm::toHex(tc.bytes); + auto diff_mod = diffbuilder.build( + test_case_name("f1", ctr), test_case_name("f2", ctr), tc.bytes, tc.addr); + + if (!diff_mod.has_value()) { + LOG(ERROR) << "Failed to lift " << std::hex << tc.addr << ": " + << llvm::toHex(tc.bytes); + + if (FLAGS_stop_on_fail) { + LOG(FATAL) << "Failed to lift an insn"; + } + return false; + } + + auto end = diff_mod->GetModule()->getDataLayout().isBigEndian() + ? llvm::support::endianness::big + : llvm::support::endianness::little; + ComparisonRunner comp_runner(end); + + if (FLAGS_should_dump_functions) { + LOG(INFO) << remill::LLVMThingToString(diff_mod->GetF<0>().llvm_function); + LOG(INFO) << remill::LLVMThingToString(diff_mod->GetF<1>().llvm_function); + } + + for (uint64_t i = 0; i < FLAGS_num_iterations; i++) { + auto tc_result = comp_runner.SingleCmpRun( + tc.bytes.size(), diff_mod->GetF<0>().llvm_function, + diff_mod->GetF<1>().llvm_function, whitelist, + diff_mod->GetF<0>().isel_name); + + if (!tc_result.are_equal) { + LOG(ERROR) << "Difference in instruction" << std::hex << tc.addr << ": " + << llvm::toHex(tc.bytes); + LOG(INFO) << "Init state: " << tc_result.init_state_dump << std::endl; + LOG(INFO) << tc_result.struct_dump1 << std::endl; + LOG(INFO) << tc_result.struct_dump2 << std::endl; + return false; + } + } + + return true; +} + + +int main(int argc, char **argv) { + google::ParseCommandLineFlags(&argc, &argv, true); + google::InitGoogleLogging(argv[0]); + + + if (FLAGS_target_insn_file.empty()) { + LOG(FATAL) << "Must provide a test case file"; + } + + auto maybe_buff = llvm::MemoryBuffer::getFileOrSTDIN(FLAGS_target_insn_file); + + if (maybe_buff.getError()) { + LOG(FATAL) << "Failed to read file with: " + << maybe_buff.getError().message(); + } + + auto maybe_json = llvm::json::parse(maybe_buff.get()->getBuffer()); + if (auto E = maybe_json.takeError()) { + LOG(FATAL) << "Failed to parse json: " << llvm::toString(std::move(E)); + } + + + std::vector testcases; + llvm::json::Path::Root root; + llvm::json::Path pth(root); + + if (!llvm::json::fromJSON(maybe_json.get(), testcases, pth)) { + LOG(FATAL) << "Failed to parse testcases"; + } + + std::vector whitelist; + + if (!FLAGS_whitelist.empty()) { + LOG(INFO) << "Reading whitelist"; + auto maybe_whitelist_buff = + llvm::MemoryBuffer::getFileOrSTDIN(FLAGS_whitelist); + if (maybe_whitelist_buff.getError()) { + LOG(FATAL) << "Failed to read whitelist file with: " + << maybe_whitelist_buff.getError().message(); + } + + auto maybe_whitelist_json = + llvm::json::parse>( + maybe_whitelist_buff.get()->getBuffer()); + if (auto E = maybe_whitelist_json.takeError()) { + LOG(FATAL) << "Failed to parse whitelist json: " + << llvm::toString(std::move(E)); + } + + whitelist = maybe_whitelist_json.get(); + } else { + LOG(ERROR) << "Not using a whitelist"; + } + + DifferentialModuleBuilder diffbuilder = DifferentialModuleBuilder::Create( + remill::OSName::kOSLinux, remill::ArchName::kArchX86, + remill::OSName::kOSLinux, remill::ArchName::kArchX86_SLEIGH); + uint64_t ctr = 0; + + std::vector failed_testcases; + auto succeeded_tot = true; + for (auto tc : testcases) { + auto tc_succeeded = runTestCase(tc, diffbuilder, whitelist, ++ctr); + if (!tc_succeeded) { + succeeded_tot = false; + failed_testcases.push_back(tc); + } + + if (!FLAGS_repro_file.empty() && !tc_succeeded) { + std::error_code ec; + llvm::raw_fd_ostream o(FLAGS_repro_file, ec); + if (ec) { + LOG(FATAL) << ec.message(); + } + + llvm::json::Array arr; + for (auto tc : failed_testcases) { + arr.push_back(llvm::toHex(tc.bytes)); + } + + llvm::json::operator<<(o, llvm::json::Value(std::move(arr))); + } + + if (!succeeded_tot && FLAGS_stop_on_fail) { + return 2; + } + } + + + return succeeded_tot ? 0 : 2; +} diff --git a/bin/differential_tester_x86/README.md b/bin/differential_tester_x86/README.md new file mode 100644 index 000000000..112d358f2 --- /dev/null +++ b/bin/differential_tester_x86/README.md @@ -0,0 +1 @@ +The checked in whitelist.json covers the known sleigh bugs that we currently are not handling \ No newline at end of file diff --git a/bin/differential_tester_x86/Whitelist.cpp b/bin/differential_tester_x86/Whitelist.cpp new file mode 100644 index 000000000..e964c63e0 --- /dev/null +++ b/bin/differential_tester_x86/Whitelist.cpp @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2022 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Whitelist.h" + +#include +#include + +#include +#include +#include + +namespace { +const static std::unordered_map< + std::string, std::function> + accessors = { + {"gpr", + [](X86State *state, std::string_view target) { + uint32_t *target_ptr = + [state](std::string_view target) -> uint32_t * { + if (target == "rip") { + return &state->gpr.rip.dword; + } + + if (target == "rax") { + return &state->gpr.rax.dword; + } + + return nullptr; + }(target); + + + if (!target_ptr) { + std::string s(target); + throw std::runtime_error(std::string("Unknown reg: ") + s); + } + + *target_ptr = 0; + }}, + {"aflags", [](X86State *state, std::string_view target) { + uint8_t *target_ptr = [state](std::string_view target) -> uint8_t * { + if (target == "af") { + return &state->aflag.af; + } + + if (target == "zf") { + return &state->aflag.zf; + } + + if (target == "of") { + return &state->aflag.of; + } + + return nullptr; + }(target); + + + if (!target_ptr) { + std::string s(target); + throw std::runtime_error(std::string("Unknown reg: ") + s); + } + + *target_ptr = 0; + }}}; +} + + +void Accessor::ApplyOverride(X86State *state) const { + if (accessors.find(this->section) == accessors.end()) { + throw std::runtime_error(std::string("Couldnt find section ") + + std::string(this->section)); + } + + accessors.find(this->section)->second(state, this->target_name); +} + +bool Accessor::fromJSON(const llvm::json::Value &E, llvm::json::Path P) { + std::vector section_names; + if (!llvm::json::fromJSON(E, section_names, P.field("state_target"))) { + return false; + } + + if (section_names.size() != 2) { + P.field("state_target") + .report( + "Currently only supports access paths of the form [section, target_var]"); + return false; + } + + this->section = section_names[0]; + LOG(INFO) << "Section is: " << this->section; + this->target_name = section_names[1]; + return true; +} + + +bool WhiteListInstruction::fromJSON(const llvm::json::Value &E, + llvm::json::Path P) { + auto maybe_obj = E.getAsObject(); + if (!maybe_obj) { + P.report("Should be an object"); + return false; + } + + auto maybe_isel_name = maybe_obj->find("isel_name"); + if (maybe_isel_name == maybe_obj->end()) { + P.report("Should have isel_name object"); + return false; + } + + std::string isel_name = ""; + if (!llvm::json::fromJSON(maybe_isel_name->second, isel_name, + P.field("isel_name"))) { + return false; + } + + + auto maybe_state_target = maybe_obj->find("state_target"); + if (maybe_state_target == maybe_obj->end()) { + P.report("Should have state target path"); + return false; + } + + if (!llvm::json::fromJSON(maybe_state_target->second, + this->target_state_portion, + P.field("state_target"))) { + return false; + } + + this->target_isel_prefix = isel_name; + + return true; +} + +void WhiteListInstruction::ApplyToInsn(std::string_view isel_name, + X86State *state) const { + if (isel_name.rfind(target_isel_prefix, 0) == 0) { + this->target_state_portion.ApplyOverride(state); + } +} + + +namespace llvm::json { +bool fromJSON(const Value &E, Accessor &Out, Path P) { + return Out.fromJSON(E, P); +} +bool fromJSON(const Value &E, WhiteListInstruction &Out, Path P) { + return Out.fromJSON(E, P); +} +} // namespace llvm::json \ No newline at end of file diff --git a/bin/differential_tester_x86/Whitelist.h b/bin/differential_tester_x86/Whitelist.h new file mode 100644 index 000000000..14ba2bba3 --- /dev/null +++ b/bin/differential_tester_x86/Whitelist.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +class Accessor { + + std::string section; + std::string target_name; + + public: + void ApplyOverride(X86State *state) const; + + bool fromJSON(const llvm::json::Value &E, llvm::json::Path P); +}; + + +class WhiteListInstruction { + private: + std::string target_isel_prefix; + Accessor target_state_portion; + + + public: + bool fromJSON(const llvm::json::Value &E, llvm::json::Path P); + + void ApplyToInsn(std::string_view isel_name, X86State *state) const; +}; + +namespace llvm::json { +bool fromJSON(const Value &E, Accessor &Out, Path P); +bool fromJSON(const Value &E, WhiteListInstruction &Out, Path P); +} // namespace llvm::json \ No newline at end of file diff --git a/bin/differential_tester_x86/data/small_test/insn_file_0.json b/bin/differential_tester_x86/data/small_test/insn_file_0.json new file mode 100644 index 000000000..e119067a3 --- /dev/null +++ b/bin/differential_tester_x86/data/small_test/insn_file_0.json @@ -0,0 +1,532 @@ +[ + "81e300ffff07", + "e847f5ffff", + "8b0d88c00408", + "75d6", + "ff2540c00408", + "ff2560c00408", + "e8d0fbffff", + "e8c6fcffff", + "e87efaffff", + "6848a00408", + "c1e81f", + "8b83fcffffff", + "c744241000000000", + "8d8310deffff", + "0fb6442409", + "0fb67c2438", + "89460c", + "ff2544c00408", + "8d442410", + "8908", + "68d1a50408", + "5d", + "e87bfeffff", + "ff2538c00408", + "6a03", + "b800000000", + "6800000000", + "e847f6ffff", + "7418", + "e899080000", + "e8dc010000", + "ff94b50cffffff", + "894804", + "8b0d7cc00408", + "e821f5ffff", + "89442434", + "e883f4ffff", + "687ea50408", + "0f44fb", + "55", + "c7460800000000", + "0fb644240e", + "90", + "e806f7ffff", + "68cba20408", + "e879fdffff", + "a384c00408", + "66c746090000", + "66c740010000", + "ebd3", + "e812f4ffff", + "83bc24f000000001", + "e85efeffff", + "c6400601", + "c7460400000000", + "e8c7f5ffff", + "89442414", + "e8c1f5ffff", + "83c40c", + "a38cc00408", + "0fb644240b", + "83c404", + "83c440", + "e80b060000", + "683fa00408", + "8b442440", + "8b0d94c00408", + "83ec04", + "8d442408", + "c9", + "89eb", + "80780a00", + "ff2528c00408", + "0fb644240a", + "8b742408", + "81c367210000", + "e9e0ffffff", + "ff742454", + "0fb654240c", + "803d6cc0040800", + "807e0200", + "c3", + "8b442410", + "a3a4c00408", + "0fb7d2", + "e801fcffff", + "85c9", + "741c", + "01d0", + "81c3f32f0000", + "d3e0", + "ff2548c00408", + "ff74246c", + "81fb00f1fe00", + "83c414", + "a37cc00408", + "ff742444", + "e867feffff", + "fec0", + "ba99a00408", + "68c4a00408", + "f646040c", + "ff37", + "e837f9ffff", + "31f6", + "8b0da4c00408", + "8b4c2404", + "e8dbf8ffff", + "e859060000", + "e836faffff", + "eb8a", + "b001", + "e83a060000", + "0fb6442408", + "0f88a1010000", + "c744241400ffff00", + "e8adf8ffff", + "6a01", + "807e0100", + "83c408", + "eb0e", + "e857f6ffff", + "e86b000000", + "68b80b0000", + "0f8eea000000", + "a3a8c00408", + "8b0d84c00408", + "c744240c00ffff00", + "8b0da8c00408", + "e807f6ffff", + "c1fb02", + "31c0", + "a374c00408", + "d1f8", + "6a04", + "c6400400", + "7429", + "50", + "8d7c2444", + "e95b030000", + "ff2485e4a60408", + "6873a30408", + "bb9ba00408", + "89df", + "ffd2", + "c20400", + "8b442424", + "83ec10", + "8d44241c", + "e895fdffff", + "8b0d9cc00408", + "6847a20408", + "6a65", + "8b44240c", + "ff2510c00408", + "e872f4ffff", + "68dda60408", + "85d2", + "ff253cc00408", + "8944241c", + "8d442414", + "e868f8ffff", + "09448c58", + "e823fcffff", + "c0e904", + "8944242c", + "8a460b", + "803e00", + "ff2554c00408", + "e896f8ffff", + "e810080000", + "752f", + "0fb64c240d", + "8d411f", + "8944243c", + "e82affffff", + "e808f6ffff", + "68f6a40408", + "8b0d78c00408", + "807e0300", + "84c0", + "ffd0", + "c1f805", + "ff251cc00408", + "83ec18", + "0f44c3", + "5f", + "7425", + "8b0d90c00408", + "0fb65603", + "80780900", + "83c41c", + "8b0db8c00408", + "fc", + "ff742420", + "e8aaf9ffff", + "6808a00408", + "a3acc00408", + "e887f6ffff", + "2b4610", + "0fb6c0", + "689ba10408", + "fec8", + "c6400301", + "ff255cc00408", + "0f954005", + "7471", + "39f3", + "f30f1efb", + "e8f2f8ffff", + "0f85f6fdffff", + "683da00408", + "81c5e6210000", + "83f903", + "83f901", + "8b0db0c00408", + "41", + "8d44244c", + "740c", + "b83da00408", + "b86cc00408", + "8d9d10ffffff", + "7417", + "8d542458", + "bf0a000000", + "e8c8f6ffff", + "0f944601", + "83c424", + "ff2508c00408", + "a39cc00408", + "c60000", + "683aa00408", + "57", + "80780400", + "b801000000", + "e823f8ffff", + "e826fdffff", + "7428", + "68fba30408", + "893c24", + "68aca00408", + "e8c2feffff", + "e893f3ffff", + "89442424", + "75d1", + "5e", + "e83ef3ffff", + "0f44d3", + "a398c00408", + "e8aaf3ffff", + "e865f9ffff", + "e8cafbffff", + "0fb64e02", + "75d3", + "e809f9ffff", + "b899a00408", + "e8e3010000", + "e877080000", + "eb10", + "8d850cffffff", + "741b", + "8d6c2454", + "a378c00408", + "89c2", + "0f85d2fdffff", + "83c430", + "684da50408", + "b999a00408", + "e857f3ffff", + "7413", + "8b1c24", + "e84ef9ffff", + "5b", + "b9d0a60408", + "eb18", + "8b7c2424", + "8b0dbcc00408", + "a390c00408", + "68caa30408", + "8b3424", + "89d7", + "8b0d74c00408", + "66c746090001", + "81c34c2e0000", + "ff254cc00408", + "6a06", + "6835a00408", + "8b742460", + "e8c1f9ffff", + "e8c4f8ffff", + "8b742424", + "53", + "75d8", + "6833890000", + "89442430", + "a3b4c00408", + "68d6a60408", + "e80cf8ffff", + "681ea30408", + "81c4dc000000", + "c744244000000000", + "807e0600", + "8a4901", + "8b742434", + "7408", + "c6400600", + "c1e208", + "66c74424181d00", + "e876fdffff", + "6878a20408", + "89442428", + "e8d6f3ffff", + "c6460401", + "e8a0070000", + "eb07", + "0f44c8", + "54", + "68e1a60408", + "ff74244c", + "742a", + "a370c00408", + "e85affffff", + "c744240800f1fe00", + "ff742438", + "83ec20", + "6a05", + "8b7c2408", + "6879a60408", + "e83f060000", + "3c05", + "a3a0c00408", + "7725", + "6609ca", + "56", + "6837a00408", + "8b5c240c", + "68d0a60408", + "7420", + "8b0d8cc00408", + "83ec0c", + "8b0da0c00408", + "89442420", + "689da00408", + "e83af8ffff", + "a394c00408", + "3d91d00300", + "83fe05", + "8b0dacc00408", + "83f904", + "83e4f0", + "6884a00408", + "e80bffffff", + "83ff64", + "83c428", + "e8bbfeffff", + "6a64", + "8b74240c", + "7c32", + "e893f9ffff", + "3c0e", + "ff2514c00408", + "e8fbfdffff", + "e825060000", + "742d", + "8b2c24", + "e823000000", + "e887f5ffff", + "e866fdffff", + "e843faffff", + "e809030000", + "8b4c2408", + "0f944602", + "66c746060000", + "8b0db4c00408", + "ff3504c00408", + "f3ab", + "0f8497000000", + "bf99a00408", + "68f5a00408", + "0fb66c240f", + "837c242400", + "8a4607", + "c7c0c0920408", + "2b460c", + "83f902", + "e8bafeffff", + "46", + "ff742428", + "e8b2f4ffff", + "e86ff9ffff", + "e83bfeffff", + "ff74245c", + "8b0d80c00408", + "c744244432000000", + "ff2518c00408", + "894c241c", + "8d7600", + "2d6cc00408", + "8b0d98c00408", + "51", + "751b", + "0f44c7", + "c6400300", + "85c0", + "68d2a60408", + "e8b0fdffff", + "8d8380deffff", + "3d6cc00408", + "68a1a40408", + "89442418", + "c6400700", + "88480b", + "7c4e", + "e8c1030000", + "31ed", + "89c6", + "a3bcc00408", + "6a18", + "7402", + "81fb00ccfd00", + "83ec14", + "89e1", + "686cc00408", + "e8fcf4ffff", + "e876f4ffff", + "66c746090101", + "0f49c1", + "8b742410", + "8b4c2414", + "83c420", + "66c740010101", + "ff2550c00408", + "c1f81f", + "e868ffffff", + "7521", + "83c454", + "68f0a10408", + "7424", + "75e3", + "8bbc24f4000000", + "c60001", + "0f944603", + "ff2524c00408", + "ff2558c00408", + "e817fdffff", + "6a00", + "6a1d", + "7212", + "c744241000ccfd00", + "e825080000", + "894810", + "e820f9ffff", + "0f44cb", + "0f9406", + "80780500", + "8b4c2420", + "8d74244e", + "ff252cc00408", + "80780600", + "ff36", + "e87bf4ffff", + "8b0d70c00408", + "83c410", + "c6056cc0040801", + "6a10", + "a3b0c00408", + "66c746090100", + "e897faffff", + "e8e3f3ffff", + "8b7c2404", + "8b7c2428", + "7e06", + "ff2530c00408", + "6848a10408", + "884607", + "8b442404", + "6a02", + "6824a60408", + "ba00000000", + "a3b8c00408", + "a388c00408", + "ff7704", + "c70600000000", + "89e5", + "ff2520c00408", + "e83efcffff", + "894808", + "e851f8ffff", + "e8ac010000", + "c744241001000000", + "e87cf9ffff", + "83ec08", + "e818fdffff", + "7509", + "6822a00408", + "8b442408", + "7f1c", + "83c601", + "52", + "b864000000", + "a380c00408", + "6a0a", + "b920000000", + "83ec54", + "eb22", + "8d7c2438", + "e866faffff", + "81ecdc000000", + "e87ff8ffff", + "e85cfeffff", + "29c3", + "89480c", + "6a14", + "be01000000", + "893424", + "c1fa02", + "3d49e80100", + "89442438", + "684ea40408", + "81e7ffffff1f", + "e8d4f3ffff", + "ff250cc00408", + "eb25", + "8db42600000000", + "7434", + "6880a00408", + "ff74242c", + "ff2534c00408", + "e8cff1ffff", + "894610" +] \ No newline at end of file diff --git a/bin/differential_tester_x86/whitelist.json b/bin/differential_tester_x86/whitelist.json new file mode 100644 index 000000000..5c3420d11 --- /dev/null +++ b/bin/differential_tester_x86/whitelist.json @@ -0,0 +1,58 @@ +[ + { + "comment": "the adjust flag isnt ever updated by sleigh", + "isel_name": "SUB_", + "state_target": [ + "aflags", + "af" + ] + }, + { + "comment": "the adjust flag isnt ever updated by sleigh", + "isel_name": "ADD_", + "state_target": [ + "aflags", + "af" + ] + }, + { + "comment": "the adjust flag isnt ever updated by sleigh", + "isel_name": "", + "state_target": [ + "aflags", + "af" + ] + }, + { + "comment": "the overflow flag has an undefined value when the shift is not 1 so we have to filter this, since remill and sleigh do different things with undefined values, unfortunately currently we wont check 1 bit shifts", + "isel_name": "SHR_", + "state_target": [ + "aflags", + "of" + ] + }, + { + "comment": "the overflow flag has an undefined value when the shift is not 1 so we have to filter this, since remill and sleigh do different things with undefined values, unfortunately currently we wont check 1 bit shifts", + "isel_name": "SAR_", + "state_target": [ + "aflags", + "of" + ] + }, + { + "comment": "the overflow flag has an undefined value when the shift is not 1 so we have to filter this, since remill and sleigh do different things with undefined values, unfortunately currently we wont check 1 bit shifts", + "isel_name": "SHL_", + "state_target": [ + "aflags", + "of" + ] + }, + { + "comment": "the overflow flag has an undefined value when the shift is not 1 so we have to filter this, since remill and sleigh do different things with undefined values, unfortunately currently we wont check 1 bit shifts", + "isel_name": "SAL_", + "state_target": [ + "aflags", + "of" + ] + } +] \ No newline at end of file diff --git a/bin/lift/Lift.cpp b/bin/lift/Lift.cpp index dbd814373..756f52dfc 100644 --- a/bin/lift/Lift.cpp +++ b/bin/lift/Lift.cpp @@ -265,8 +265,11 @@ int main(int argc, char *argv[]) { Memory memory = UnhexlifyInputBytes(addr_mask); SimpleTraceManager manager(memory); remill::IntrinsicTable intrinsics(module.get()); - remill::InstructionLifter inst_lifter(arch, intrinsics); - remill::TraceLifter trace_lifter(inst_lifter, manager); + + + auto inst_lifter = arch->DefaultLifter(intrinsics); + + remill::TraceLifter trace_lifter(*inst_lifter.get(), manager); // Lift all discoverable traces starting from `--entry_address` into // `module`. diff --git a/cmake/options.cmake b/cmake/options.cmake index 2bc7c3bce..39a6d51d2 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -1,28 +1,24 @@ include(CMakeDependentOption) -set(can_enable_testing FALSE) +set(can_enable_testing TRUE) set(can_enable_testing_x86 FALSE) set(can_enable_testing_aarch64 FALSE) # tests -if ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "AppleClang") - +if("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "AppleClang") if(NOT "${PLATFORM_NAME}" STREQUAL "windows") if("${CMAKE_HOST_SYSTEM_PROCESSOR}" STREQUAL "AMD64" OR "${CMAKE_HOST_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - set(can_enable_testing TRUE) set(can_enable_testing_x86 TRUE) endif() endif() if("${CMAKE_HOST_SYSTEM_PROCESSOR}" STREQUAL "aarch64" AND "${PLATFORM_NAME}" STREQUAL "linux") message(STATUS "aarch64 tests enabled") - set(can_enable_testing TRUE) - set(can_enable_testing_aarch64 TRUE) + set(can_enable_testing_aarch64 TRUE) endif() endif() - set(REMILL_SOURCE_DIR "${PROJECT_SOURCE_DIR}" CACHE PATH "Root directory of remill source code") set(REMILL_INSTALL_LIB_DIR "${CMAKE_INSTALL_LIBDIR}" CACHE PATH "Directory in which remill libraries will be installed") set(REMILL_INSTALL_BIN_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Directory in which remill binaries will be installed") @@ -32,3 +28,5 @@ option(REMILL_ENABLE_INSTALL_TARGET "Should Remill be installed?" TRUE) cmake_dependent_option(REMILL_ENABLE_TESTING "Build your tests" ON "can_enable_testing" OFF) cmake_dependent_option(REMILL_ENABLE_TESTING_X86 "Build your tests" ON "REMILL_ENABLE_TESTING;can_enable_testing_x86" OFF) cmake_dependent_option(REMILL_ENABLE_TESTING_AARCH64 "Build your tests" ON "REMILL_ENABLE_TESTING;can_enable_testing_aarch64" OFF) +cmake_dependent_option(REMILL_ENABLE_TESTING_SLEIGH_THUMB "Build cross platform sliegh tests" ON "REMILL_ENABLE_TESTING" OFF) +cmake_dependent_option(REMILL_ENABLE_DIFFERENTIAL_TESTING "Build cross platform differential testing of sleigh x86" ON "REMILL_ENABLE_TESTING" OFF) \ No newline at end of file diff --git a/include/remill/Arch/AArch32/AArch32Base.h b/include/remill/Arch/AArch32/AArch32Base.h new file mode 100644 index 000000000..ddbc1c3df --- /dev/null +++ b/include/remill/Arch/AArch32/AArch32Base.h @@ -0,0 +1,44 @@ + +#pragma once +#include +#include + + +// clang-format off +#define ADDRESS_SIZE 32 +#include +// clang-format on + +#include +namespace remill { +/// Class to derive from to handle x86 addregs +class AArch32ArchBase : public virtual ArchBase { + public: + AArch32ArchBase(llvm::LLVMContext *context_, OSName os_name_, + ArchName arch_name_) + : ArchBase(context_, os_name_, arch_name_) {} + + virtual std::string_view StackPointerRegisterName(void) const; + + std::string_view ProgramCounterRegisterName(void) const; + uint64_t MinInstructionAlign(void) const; + + + uint64_t MinInstructionSize(void) const; + + uint64_t MaxInstructionSize(bool) const; + llvm::CallingConv::ID DefaultCallingConv(void) const; + + llvm::DataLayout DataLayout(void) const; + + llvm::Triple Triple(void) const; + + + void PopulateRegisterTable(void) const; + // Populate a just-initialized lifted function function with architecture- + // specific variables. + void FinishLiftedFunctionInitialization(llvm::Module *module, + llvm::Function *bb_func) const; + virtual ~AArch32ArchBase(void) = default; +}; +} // namespace remill \ No newline at end of file diff --git a/include/remill/Arch/Arch.h b/include/remill/Arch/Arch.h index 9216b673b..92c5dcf5b 100644 --- a/include/remill/Arch/Arch.h +++ b/include/remill/Arch/Arch.h @@ -28,12 +28,15 @@ #include #include #include +#include + #pragma clang diagnostic pop // clang-format on #include #include +#include #include #include #include @@ -60,13 +63,50 @@ enum OSName : uint32_t; enum ArchName : uint32_t; class Arch; -class ArchImpl; class Instruction; +// An RAII locker for handling issues related to SLEIGH. +class ArchLocker { + private: + friend class Arch; + + std::mutex *lock; + + ArchLocker(const ArchLocker &) = delete; + ArchLocker &operator=(const ArchLocker &) = delete; + + inline ArchLocker(std::mutex *lock_) : lock(lock_) { + if (lock) { + lock->lock(); + } + } + + public: + inline ArchLocker(void) : lock(nullptr) {} + + inline ~ArchLocker(void) { + if (lock) { + lock->unlock(); + } + } + + inline ArchLocker(ArchLocker &&that) noexcept : lock(that.lock) { + that.lock = nullptr; + } + + inline ArchLocker &operator=(ArchLocker &&that) noexcept { + ArchLocker copy(std::forward(that)); + std::swap(lock, copy.lock); + return *this; + } +}; + struct Register { public: - Register(const std::string &name_, uint64_t offset_, uint64_t size_, - llvm::Type *type_, const Register *parent_, const ArchImpl *arch_); + friend class Arch; + + Register(const std::string &name_, uint64_t offset_, llvm::Type *type_, + const Register *parent_, const Arch *arch_); std::string name; // Name of the register. uint64_t offset; // Byte offset in `State`. @@ -114,14 +154,10 @@ struct Register { llvm::Value *AddressOf(llvm::Value *state_ptr, llvm::IRBuilder<> &ir) const; - private: - friend class Arch; - const Register *const parent; - const ArchImpl *const arch; + const Arch *const arch; - // The directly enclosed registers. - std::vector children; + mutable std::vector children; void ComputeGEPAccessors(const llvm::DataLayout &dl, llvm::StructType *state_type); @@ -143,34 +179,39 @@ class Arch { static auto Get(llvm::LLVMContext &context, OSName os, ArchName arch_name) -> ArchPtr; + // Return the type of an address, i.e. `addr_t` in the semantics. This is + // based off of `context` and `address_size`. + llvm::IntegerType *AddressType(void) const; + // Return the type of the state structure. - llvm::StructType *StateStructType(void) const; + virtual llvm::StructType *StateStructType(void) const = 0; // Pointer to a state structure type. - llvm::PointerType *StatePointerType(void) const; - - // Return the type of an address, i.e. `addr_t` in the semantics. - llvm::IntegerType *AddressType(void) const; + virtual llvm::PointerType *StatePointerType(void) const = 0; // The type of memory. - llvm::PointerType *MemoryPointerType(void) const; + virtual llvm::PointerType *MemoryPointerType(void) const = 0; // Return the type of a lifted function. - llvm::FunctionType *LiftedFunctionType(void) const; + virtual llvm::FunctionType *LiftedFunctionType(void) const = 0; // Returns the type of the register window. If the architecture doesn't have a register window, a // null pointer will be returned. - llvm::StructType *RegisterWindowType() const; + virtual llvm::StructType *RegisterWindowType(void) const = 0; + + + virtual unsigned RegMdID(void) const = 0; // Apply `cb` to every register. - void ForEachRegister(std::function cb) const; + virtual void + ForEachRegister(std::function cb) const = 0; // Return information about the register at offset `offset` in the `State` // structure. - const Register *RegisterAtStateOffset(uint64_t offset) const; + virtual const Register *RegisterAtStateOffset(uint64_t offset) const = 0; // Return information about a register, given its name. - const Register *RegisterByName(std::string_view name) const; + virtual const Register *RegisterByName(std::string_view name) const = 0; // Returns the name of the stack pointer register. virtual std::string_view StackPointerRegisterName(void) const = 0; @@ -205,7 +246,7 @@ class Arch { // associated with `module`. // // NOTE(pag): This is an internal API. - void InitFromSemanticsModule(llvm::Module *module) const; + virtual void InitFromSemanticsModule(llvm::Module *module) const = 0; inline void PrepareModule(const std::unique_ptr &mod) const { PrepareModule(mod.get()); @@ -215,6 +256,12 @@ class Arch { // information for the target architecture void PrepareModuleDataLayout(llvm::Module *mod) const; + + // TODO(Ian): This is kinda messy but only an arch currently knows if it is + // sleigh or not and sleigh needs different lifting context etc. + virtual InstructionLifter::LifterPtr + DefaultLifter(const remill::IntrinsicTable &intrinsics) const = 0; + inline void PrepareModuleDataLayout(const std::unique_ptr &mod) const { PrepareModuleDataLayout(mod.get()); @@ -252,7 +299,7 @@ class Arch { // `permit_fuse_idioms` is `true` if Remill is allowed to decode multiple // instructions at a time and look for instruction fusing idioms that are // common to this architecture. - virtual uint64_t MaxInstructionSize(bool permit_fuse_idioms=true) const = 0; + virtual uint64_t MaxInstructionSize(bool permit_fuse_idioms = true) const = 0; // Default calling convention for this architecture. virtual llvm::CallingConv::ID DefaultCallingConv(void) const = 0; @@ -308,24 +355,44 @@ class Arch { // include all feature sets. static ArchPtr GetHostArch(llvm::LLVMContext &contex); - protected: - Arch(llvm::LLVMContext *context_, OSName os_name_, ArchName arch_name_); - // Populate the table of register information. + // + // NOTE(pag): Internal API; do not invoke unless you are proxying/composing + // architectures. virtual void PopulateRegisterTable(void) const = 0; // Populate a just-initialized lifted function function with architecture- // specific variables. - virtual void FinishLiftedFunctionInitialization( - llvm::Module *module, llvm::Function *bb_func) const = 0; + // + // NOTE(pag): Internal API; do not invoke unless you are proxying/composing + // architectures. + virtual void + FinishLiftedFunctionInitialization(llvm::Module *module, + llvm::Function *bb_func) const = 0; - llvm::Triple BasicTriple(void) const; + // Add a register into this architecture. + // + // NOTE(pag): Internal API; do not invoke unless you are proxying/composing + // architectures. + virtual const Register *AddRegister(const char *reg_name, + llvm::Type *val_type, size_t offset, + const char *parent_reg_name) const = 0; + + // Returns a lock on global state. In general, Remill doesn't use global + // variables for storing state; however, SLEIGH sometimes does, and so when + // using SLEIGH-backed architectures, it can be necessary to acquire this + // lock. + static ArchLocker Lock(ArchName arch_name_); + + protected: + Arch(llvm::LLVMContext *context_, OSName os_name_, ArchName arch_name_); - // Add a register into this - const Register *AddRegister(const char *reg_name, llvm::Type *val_type, - size_t offset, const char *parent_reg_name) const; + llvm::Triple BasicTriple(void) const; private: + static ArchPtr GetArchByName(llvm::LLVMContext *context_, OSName os_name_, + ArchName arch_name_); + // Defined in `lib/Arch/X86/Arch.cpp`. static ArchPtr GetX86(llvm::LLVMContext *context, OSName os, ArchName arch_name); @@ -338,6 +405,14 @@ class Arch { static ArchPtr GetAArch64(llvm::LLVMContext *context, OSName os, ArchName arch_name); + // Defined in `lib/Arch/SleighX86/Arch.cpp` + static ArchPtr GetSleighX86(llvm::LLVMContext *context, OSName os, + ArchName arch_name); + + // Defined in `lib/Arch/SleighThumb2/Arch.cpp` + static ArchPtr GetSleighThumb2(llvm::LLVMContext *context, OSName os, + ArchName arch_name); + // Defined in `lib/Arch/SPARC32/Arch.cpp`. static ArchPtr GetSPARC(llvm::LLVMContext *context, OSName os, ArchName arch_name); @@ -347,9 +422,6 @@ class Arch { ArchName arch_name); Arch(void) = delete; - - protected: - mutable std::unique_ptr impl; }; } // namespace remill diff --git a/include/remill/Arch/ArchBase.h b/include/remill/Arch/ArchBase.h new file mode 100644 index 000000000..122192320 --- /dev/null +++ b/include/remill/Arch/ArchBase.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2021 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include + +namespace llvm { +class FunctionType; +class PointerType; +class StructType; +} // namespace llvm +namespace remill { + +struct Register; + +// Internal base architecture for all Remill-internal architectures. +class ArchBase : public remill::Arch { + public: + using ArchPtr = std::unique_ptr; + + ArchBase(llvm::LLVMContext *context_, OSName os_name_, ArchName arch_name_); + + virtual ~ArchBase(void) = default; + + // Return the type of the state structure. + llvm::StructType *StateStructType(void) const final; + + // Pointer to a state structure type. + llvm::PointerType *StatePointerType(void) const final; + + // The type of memory. + llvm::PointerType *MemoryPointerType(void) const final; + + // Return the type of a lifted function. + llvm::FunctionType *LiftedFunctionType(void) const final; + + llvm::StructType *RegisterWindowType(void) const final; + + // Apply `cb` to every register. + void ForEachRegister(std::function cb) const final; + + // Return information about the register at offset `offset` in the `State` + // structure. + const Register *RegisterAtStateOffset(uint64_t offset) const final; + + // Return information about a register, given its name. + const Register *RegisterByName(std::string_view name) const final; + + unsigned RegMdID(void) const final; + + // TODO(Ian): This is kinda messy but only an arch currently knows if it is + // sleigh or not and sleigh needs different lifting context etc. + InstructionLifter::LifterPtr + DefaultLifter(const remill::IntrinsicTable &intrinsics) const override; + + // Get the state pointer and various other types from the `llvm::LLVMContext` + // associated with `module`. + // + // NOTE(pag): This is an internal API. + void InitFromSemanticsModule(llvm::Module *module) const final; + + // Add a register into this architecture. + const Register *AddRegister(const char *reg_name, llvm::Type *val_type, + size_t offset, + const char *parent_reg_name) const final; + + // State type. Initially this is `nullptr` because we can construct and arch + // without loading in a semantics module. When we load a semantics module, we + // learn about the LLVM type of the state structure, and so we need to be + // able to update this in-place. + mutable llvm::StructType *state_type{nullptr}; + + // Memory pointer type. + mutable llvm::PointerType *memory_type{nullptr}; + + // Lifted function type. + mutable llvm::FunctionType *lifted_function_type{nullptr}; + + // Register window type. + mutable llvm::StructType *register_window_type{nullptr}; + + // Metadata type ID for remill registers. + mutable unsigned reg_md_id{0}; + + mutable std::vector> registers; + mutable std::vector reg_by_offset; + mutable std::unordered_map reg_by_name; +}; + +} // namespace remill diff --git a/include/remill/Arch/Instruction.h b/include/remill/Arch/Instruction.h index 55d438e11..57a0e8671 100644 --- a/include/remill/Arch/Instruction.h +++ b/include/remill/Arch/Instruction.h @@ -209,6 +209,9 @@ class Instruction { // of instructions. ArchName sub_arch_name; + // Name of the architecture of the branch taken target. + ArchName branch_taken_arch_name; + // Pointer to the `remill::Arch` used to complete the decoding of this // instruction. const Arch *arch; diff --git a/include/remill/Arch/Name.h b/include/remill/Arch/Name.h index 0a61ec95d..0124db7c7 100644 --- a/include/remill/Arch/Name.h +++ b/include/remill/Arch/Name.h @@ -76,10 +76,12 @@ enum ArchName : uint32_t { kArchX86, kArchX86_AVX, kArchX86_AVX512, + kArchX86_SLEIGH, kArchAMD64, kArchAMD64_AVX, kArchAMD64_AVX512, + kArchAMD64_SLEIGH, kArchAArch32LittleEndian, kArchAArch64LittleEndian, diff --git a/include/remill/Arch/X86/X86Base.h b/include/remill/Arch/X86/X86Base.h new file mode 100644 index 000000000..cae9107b8 --- /dev/null +++ b/include/remill/Arch/X86/X86Base.h @@ -0,0 +1,44 @@ + +#pragma once +#include +#include +// clang-format off +#define HAS_FEATURE_AVX 1 +#define HAS_FEATURE_AVX512 1 +#define ADDRESS_SIZE_BITS 64 +#define INCLUDED_FROM_REMILL +#include +// clang-format on + +#include +namespace remill { +/// Class to derive from to handle x86 addregs +class X86ArchBase : public virtual ArchBase { + public: + X86ArchBase(llvm::LLVMContext *context_, OSName os_name_, ArchName arch_name_) + : ArchBase(context_, os_name_, arch_name_) {} + + virtual std::string_view StackPointerRegisterName(void) const; + + std::string_view ProgramCounterRegisterName(void) const; + uint64_t MinInstructionAlign(void) const; + + + uint64_t MinInstructionSize(void) const; + + uint64_t MaxInstructionSize(bool) const; + llvm::CallingConv::ID DefaultCallingConv(void) const; + + llvm::DataLayout DataLayout(void) const; + + llvm::Triple Triple(void) const; + + + void PopulateRegisterTable(void) const; + // Populate a just-initialized lifted function function with architecture- + // specific variables. + void FinishLiftedFunctionInitialization(llvm::Module *module, + llvm::Function *bb_func) const; + virtual ~X86ArchBase(void) = default; +}; +} // namespace remill \ No newline at end of file diff --git a/include/remill/BC/InstructionLifter.h b/include/remill/BC/InstructionLifter.h index caf0f8e3d..dbec6e8d8 100644 --- a/include/remill/BC/InstructionLifter.h +++ b/include/remill/BC/InstructionLifter.h @@ -56,6 +56,8 @@ enum LiftStatus { // is called with the appropriate arguments. class InstructionLifter { public: + using LifterPtr = std::unique_ptr; + virtual ~InstructionLifter(void); inline InstructionLifter(const std::unique_ptr &arch_, @@ -90,6 +92,9 @@ class InstructionLifter { // Clear out the cache of the current register values/addresses loaded. void ClearCache(void) const; + + llvm::Type *GetMemoryType(); + protected: // Lift an operand to an instruction. virtual llvm::Value *LiftOperand(Instruction &inst, llvm::BasicBlock *block, @@ -135,6 +140,14 @@ class InstructionLifter { LoadWordRegValOrZero(llvm::BasicBlock *block, llvm::Value *state_ptr, std::string_view reg_name, llvm::ConstantInt *zero); + + protected: + llvm::Type *GetWordType(); + + + const IntrinsicTable *GetIntrinsicTable(); + bool ArchHasRegByName(std::string name); + private: friend class TraceLifter; diff --git a/include/remill/BC/Lifter.h b/include/remill/BC/Lifter.h index 7d38d63bd..434a9d461 100644 --- a/include/remill/BC/Lifter.h +++ b/include/remill/BC/Lifter.h @@ -17,4 +17,5 @@ #pragma once #include "InstructionLifter.h" +#include "SleighLifter.h" #include "TraceLifter.h" diff --git a/include/remill/BC/SleighLifter.h b/include/remill/BC/SleighLifter.h new file mode 100644 index 000000000..d447298c9 --- /dev/null +++ b/include/remill/BC/SleighLifter.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2022-present Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +#include "remill/Arch/Instruction.h" +#include "remill/BC/InstructionLifter.h" + +class Sleigh; + +namespace remill { +namespace sleigh { +class SleighArch; +class SingleInstructionSleighContext; +} // namespace sleigh + +class SleighLifter : public InstructionLifter { + private: + class PcodeToLLVMEmitIntoBlock; + + std::unique_ptr sleigh_context; + // Architecture being used for lifting. + const sleigh::SleighArch *const arch; + + + public: + static const std::string_view kInstructionFunctionPrefix; + + SleighLifter(const sleigh::SleighArch *arch_, + const IntrinsicTable &intrinsics_); + + virtual ~SleighLifter(void) = default; + + LiftStatus LiftIntoBlock(Instruction &inst, llvm::BasicBlock *block, + llvm::Value *state_ptr, bool is_delayed) override; + + private: + static void SetISelAttributes(llvm::Function *); + + std::pair + LiftIntoInternalBlock(Instruction &inst, llvm::Module *target_mod, + bool is_delayed); + + ::Sleigh &GetEngine(void) const; +}; + +} // namespace remill diff --git a/lib/Arch/AArch32/Arch.cpp b/lib/Arch/AArch32/Arch.cpp index e58d9ee07..f5dd91af5 100644 --- a/lib/Arch/AArch32/Arch.cpp +++ b/lib/Arch/AArch32/Arch.cpp @@ -43,156 +43,17 @@ // clang-format on -#include "../Arch.h" // For `ArchImpl`. +#include // For `ArchImpl`. namespace remill { AArch32Arch::AArch32Arch(llvm::LLVMContext *context_, OSName os_name_, ArchName arch_name_) - : Arch(context_, os_name_, arch_name_) {} + : ArchBase(context_, os_name_, arch_name_), + AArch32ArchBase(context_, os_name_, arch_name_) {} AArch32Arch::~AArch32Arch(void) {} -// TODO(pag): Eventually handle Thumb2 and unaligned addresses. -uint64_t AArch32Arch::MinInstructionAlign(void) const { - return 4; -} - -uint64_t AArch32Arch::MinInstructionSize(void) const { - return 4; -} - -// Maximum number of bytes in an instruction for this particular architecture. -uint64_t AArch32Arch::MaxInstructionSize(bool) const { - return 4; -} - -// Default calling convention for this architecture. -llvm::CallingConv::ID AArch32Arch::DefaultCallingConv(void) const { - return llvm::CallingConv::C; // cdecl. -} - -// Get the LLVM triple for this architecture. -llvm::Triple AArch32Arch::Triple(void) const { - auto triple = BasicTriple(); - switch (arch_name) { - case kArchAArch32LittleEndian: triple.setArch(llvm::Triple::arm); break; - default: - LOG(FATAL) << "Cannot get triple for non-aarch32 architecture " - << GetArchName(arch_name); - } - - return triple; -} - -// Get the LLVM DataLayout for a module. -llvm::DataLayout AArch32Arch::DataLayout(void) const { - std::string dl; - switch (os_name) { - case kOSInvalid: - LOG(FATAL) << "Cannot convert module for an unrecognized OS."; - break; - - case kOSLinux: - case kOSSolaris: - case kOSmacOS: - case kOSWindows: - dl = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"; - break; - } - - return llvm::DataLayout(dl); -} - -// Returns the name of the stack pointer register. -std::string_view AArch32Arch::StackPointerRegisterName(void) const { - return "SP"; -} - -// Returns the name of the program counter register. -std::string_view AArch32Arch::ProgramCounterRegisterName(void) const { - return "PC"; -} - -// Populate the table of register information. -void AArch32Arch::PopulateRegisterTable(void) const { - CHECK_NOTNULL(context); - - impl->reg_by_offset.resize(sizeof(AArch32State)); - - auto u8 = llvm::Type::getInt8Ty(*context); - - auto u32 = llvm::Type::getInt32Ty(*context); - -#define OFFSET_OF(type, access) \ - (reinterpret_cast(&reinterpret_cast( \ - static_cast(nullptr)->access))) - -#define REG(name, access, type) \ - AddRegister(#name, type, OFFSET_OF(AArch32State, access), nullptr) - -#define SUB_REG(name, access, type, parent_reg_name) \ - AddRegister(#name, type, OFFSET_OF(AArch32State, access), #parent_reg_name) - - REG(R0, gpr.r0.dword, u32); - REG(R1, gpr.r1.dword, u32); - REG(R2, gpr.r2.dword, u32); - REG(R3, gpr.r3.dword, u32); - REG(R4, gpr.r4.dword, u32); - REG(R5, gpr.r5.dword, u32); - REG(R6, gpr.r6.dword, u32); - REG(R7, gpr.r7.dword, u32); - REG(R8, gpr.r8.dword, u32); - REG(R9, gpr.r9.dword, u32); - REG(R10, gpr.r10.dword, u32); - REG(R11, gpr.r11.dword, u32); - REG(R12, gpr.r12.dword, u32); - REG(R13, gpr.r13.dword, u32); - REG(R14, gpr.r14.dword, u32); - REG(R15, gpr.r15.dword, u32); - - SUB_REG(SP, gpr.r13.dword, u32, R13); - SUB_REG(LR, gpr.r14.dword, u32, R14); - SUB_REG(PC, gpr.r15.dword, u32, R15); - - REG(N, sr.n, u8); - REG(C, sr.c, u8); - REG(Z, sr.z, u8); - REG(V, sr.v, u8); -} - - -// Populate a just-initialized lifted function function with architecture- -// specific variables. -void AArch32Arch::FinishLiftedFunctionInitialization( - llvm::Module *module, llvm::Function *bb_func) const { - const auto &dl = module->getDataLayout(); - CHECK_EQ(sizeof(State), dl.getTypeAllocSize(StateStructType())) - << "Mismatch between size of State type for x86/amd64 and what is in " - << "the bitcode module"; - - auto &context = module->getContext(); - auto u8 = llvm::Type::getInt8Ty(context); - - // auto u16 = llvm::Type::getInt16Ty(context); - auto u32 = llvm::Type::getInt32Ty(context); - auto addr = llvm::Type::getIntNTy(context, address_size); - - const auto entry_block = &bb_func->getEntryBlock(); - llvm::IRBuilder<> ir(entry_block); - - const auto pc_arg = NthArgument(bb_func, kPCArgNum); - const auto state_ptr_arg = NthArgument(bb_func, kStatePointerArgNum); - ir.CreateStore(pc_arg, - ir.CreateAlloca(addr, nullptr, kNextPCVariableName.data())); - ir.CreateStore( - pc_arg, ir.CreateAlloca(addr, nullptr, kIgnoreNextPCVariableName.data())); - - auto zero_c = ir.CreateAlloca(u8, nullptr, "ZERO_C"); - ir.CreateStore(llvm::Constant::getNullValue(u8), zero_c); - ir.CreateAlloca(u32, nullptr, "SUPPRESS_WRITEBACK"); - (void) this->RegisterByName("PC")->AddressOf(state_ptr_arg, ir); -} // TODO(pag): We pretend that these are singletons, but they aren't really! Arch::ArchPtr Arch::GetAArch32(llvm::LLVMContext *context_, OSName os_name_, diff --git a/lib/Arch/AArch32/Arch.h b/lib/Arch/AArch32/Arch.h index 5dd6fef6e..7a468b753 100644 --- a/lib/Arch/AArch32/Arch.h +++ b/lib/Arch/AArch32/Arch.h @@ -16,44 +16,18 @@ #pragma once -#include "../Arch.h" // For `Arch` and `ArchImpl`. +#include namespace remill { -class AArch32Arch final : public Arch { +class AArch32Arch final : public AArch32ArchBase { public: AArch32Arch(llvm::LLVMContext *context_, OSName os_name_, ArchName arch_name_); virtual ~AArch32Arch(void); - // Returns the name of the stack pointer register. - std::string_view StackPointerRegisterName(void) const final; - - // Returns the name of the program counter register. - std::string_view ProgramCounterRegisterName(void) const final; - - // Decode an instuction. bool DecodeInstruction(uint64_t address, std::string_view inst_bytes, - Instruction &inst) const final; - - // Align/Minimum/Maximum number of bytes in an instruction. - uint64_t MinInstructionAlign(void) const final; - uint64_t MinInstructionSize(void) const final; - uint64_t MaxInstructionSize(bool permit_fuse_idioms) const final; - - llvm::Triple Triple(void) const final; - llvm::DataLayout DataLayout(void) const final; - - // Default calling convention for this architecture. - llvm::CallingConv::ID DefaultCallingConv(void) const final; - - // Populate the table of register information. - void PopulateRegisterTable(void) const final; - - // Populate a just-initialized lifted function function with architecture- - // specific variables. - void FinishLiftedFunctionInitialization( - llvm::Module *module, llvm::Function *bb_func) const final; + Instruction &inst) const override; private: AArch32Arch(void) = delete; diff --git a/lib/Arch/AArch32/Base.cpp b/lib/Arch/AArch32/Base.cpp new file mode 100644 index 000000000..910ac4c3c --- /dev/null +++ b/lib/Arch/AArch32/Base.cpp @@ -0,0 +1,157 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace remill { + + +// TODO(pag): Eventually handle Thumb2 and unaligned addresses. +uint64_t AArch32ArchBase::MinInstructionAlign(void) const { + return 4; +} + +uint64_t AArch32ArchBase::MinInstructionSize(void) const { + return 4; +} + +// Maximum number of bytes in an instruction for this particular architecture. +uint64_t AArch32ArchBase::MaxInstructionSize(bool) const { + return 4; +} + +// Default calling convention for this architecture. +llvm::CallingConv::ID AArch32ArchBase::DefaultCallingConv(void) const { + return llvm::CallingConv::C; // cdecl. +} + +// Get the LLVM triple for this architecture. +llvm::Triple AArch32ArchBase::Triple(void) const { + auto triple = BasicTriple(); + switch (arch_name) { + case kArchAArch32LittleEndian: triple.setArch(llvm::Triple::arm); break; + default: + LOG(FATAL) << "Cannot get triple for non-aarch32 architecture " + << GetArchName(arch_name); + } + + return triple; +} + +// Get the LLVM DataLayout for a module. +llvm::DataLayout AArch32ArchBase::DataLayout(void) const { + std::string dl; + switch (os_name) { + case kOSInvalid: + LOG(FATAL) << "Cannot convert module for an unrecognized OS."; + break; + + case kOSLinux: + case kOSSolaris: + case kOSmacOS: + case kOSWindows: + dl = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"; + break; + } + + return llvm::DataLayout(dl); +} + +// Returns the name of the stack pointer register. +std::string_view AArch32ArchBase::StackPointerRegisterName(void) const { + return "SP"; +} + +// Returns the name of the program counter register. +std::string_view AArch32ArchBase::ProgramCounterRegisterName(void) const { + return "PC"; +} + +// Populate the table of register information. +void AArch32ArchBase::PopulateRegisterTable(void) const { + CHECK_NOTNULL(context); + + reg_by_offset.resize(sizeof(AArch32State)); + + auto u8 = llvm::Type::getInt8Ty(*context); + + auto u32 = llvm::Type::getInt32Ty(*context); + +#define OFFSET_OF(type, access) \ + (reinterpret_cast(&reinterpret_cast( \ + static_cast(nullptr)->access))) + +#define REG(name, access, type) \ + AddRegister(#name, type, OFFSET_OF(AArch32State, access), nullptr) + +#define SUB_REG(name, access, type, parent_reg_name) \ + AddRegister(#name, type, OFFSET_OF(AArch32State, access), #parent_reg_name) + + REG(R0, gpr.r0.dword, u32); + REG(R1, gpr.r1.dword, u32); + REG(R2, gpr.r2.dword, u32); + REG(R3, gpr.r3.dword, u32); + REG(R4, gpr.r4.dword, u32); + REG(R5, gpr.r5.dword, u32); + REG(R6, gpr.r6.dword, u32); + REG(R7, gpr.r7.dword, u32); + REG(R8, gpr.r8.dword, u32); + REG(R9, gpr.r9.dword, u32); + REG(R10, gpr.r10.dword, u32); + REG(R11, gpr.r11.dword, u32); + REG(R12, gpr.r12.dword, u32); + REG(R13, gpr.r13.dword, u32); + REG(R14, gpr.r14.dword, u32); + REG(R15, gpr.r15.dword, u32); + + SUB_REG(SP, gpr.r13.dword, u32, R13); + SUB_REG(LR, gpr.r14.dword, u32, R14); + SUB_REG(PC, gpr.r15.dword, u32, R15); + + REG(N, sr.n, u8); + REG(C, sr.c, u8); + REG(Z, sr.z, u8); + REG(V, sr.v, u8); +} + + +// Populate a just-initialized lifted function function with architecture- +// specific variables. +void AArch32ArchBase::FinishLiftedFunctionInitialization( + llvm::Module *module, llvm::Function *bb_func) const { + const auto &dl = module->getDataLayout(); + CHECK_EQ(sizeof(State), dl.getTypeAllocSize(StateStructType())) + << "Mismatch between size of State type for x86/amd64 and what is in " + << "the bitcode module"; + + auto &context = module->getContext(); + auto u8 = llvm::Type::getInt8Ty(context); + + // auto u16 = llvm::Type::getInt16Ty(context); + auto u32 = llvm::Type::getInt32Ty(context); + auto addr = llvm::Type::getIntNTy(context, address_size); + + const auto entry_block = &bb_func->getEntryBlock(); + llvm::IRBuilder<> ir(entry_block); + + const auto pc_arg = NthArgument(bb_func, kPCArgNum); + const auto state_ptr_arg = NthArgument(bb_func, kStatePointerArgNum); + ir.CreateStore(pc_arg, + ir.CreateAlloca(addr, nullptr, kNextPCVariableName.data())); + ir.CreateStore( + pc_arg, ir.CreateAlloca(addr, nullptr, kIgnoreNextPCVariableName.data())); + + auto zero_c = ir.CreateAlloca(u8, nullptr, "ZERO_C"); + ir.CreateStore(llvm::Constant::getNullValue(u8), zero_c); + ir.CreateAlloca(u32, nullptr, "SUPPRESS_WRITEBACK"); + (void) this->RegisterByName("PC")->AddressOf(state_ptr_arg, ir); +} +} // namespace remill \ No newline at end of file diff --git a/lib/Arch/AArch32/CMakeLists.txt b/lib/Arch/AArch32/CMakeLists.txt index 293aff642..ae4e285af 100644 --- a/lib/Arch/AArch32/CMakeLists.txt +++ b/lib/Arch/AArch32/CMakeLists.txt @@ -4,7 +4,7 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -20,15 +20,17 @@ add_library(remill_arch_aarch32 STATIC "${REMILL_INCLUDE_DIR}/remill/Arch/Runtime/Runtime.h" "${REMILL_INCLUDE_DIR}/remill/Arch/Runtime/State.h" "${REMILL_INCLUDE_DIR}/remill/Arch/Runtime/Types.h" - + "${REMILL_INCLUDE_DIR}/remill/Arch/AArch32/Runtime/Operators.h" "${REMILL_INCLUDE_DIR}/remill/Arch/AArch32/Runtime/State.h" "${REMILL_INCLUDE_DIR}/remill/Arch/AArch32/Runtime/Types.h" - + Arch.cpp Decode.cpp -# Decode.h -# Extract.cpp + Base.cpp + + # Decode.h + # Extract.cpp ) add_subdirectory(Runtime) diff --git a/lib/Arch/AArch32/Decode.cpp b/lib/Arch/AArch32/Decode.cpp index 54b3e4b63..d695c82bc 100644 --- a/lib/Arch/AArch32/Decode.cpp +++ b/lib/Arch/AArch32/Decode.cpp @@ -18,8 +18,11 @@ #include +#include +#include + #include "Arch.h" -#include "remill/BC/ABI.h" +#include "../BitManipulation.h" namespace remill { @@ -724,7 +727,7 @@ static void ExpandTo32AddImmAddCarry(Instruction &inst, uint32_t imm12, if (!rotation_amount) { AddImmOp(inst, unrotated_value); } else { - AddImmOp(inst, __builtin_rotateright32(unrotated_value, rotation_amount)); + AddImmOp(inst, RotateRight32(unrotated_value, rotation_amount)); } if (carry_out) { @@ -1074,9 +1077,9 @@ std::optional EvalShift(const Operand::ShiftRegister &op, switch (op.shift_op) { case Operand::ShiftRegister::kShiftInvalid: return maybe_val; case Operand::ShiftRegister::kShiftLeftAround: - return __builtin_rotateleft32(val, static_cast(op.shift_size)); + return RotateLeft32(val, static_cast(op.shift_size)); case Operand::ShiftRegister::kShiftRightAround: - return __builtin_rotateright32(val, static_cast(op.shift_size)); + return RotateRight32(val, static_cast(op.shift_size)); case Operand::ShiftRegister::kShiftLeftWithOnes: return (val << op.shift_size) | ~(~0u << op.shift_size); case Operand::ShiftRegister::kShiftLeftWithZeroes: @@ -1209,10 +1212,12 @@ static bool EvalPCDest(Instruction &inst, const bool s, const unsigned int rd, } else { auto res = evaluator(*src1, *src2); if (!res) { + inst.branch_taken_pc = 0; if (is_cond) { inst.branch_not_taken_pc = inst.next_pc; inst.category = Instruction::kCategoryConditionalIndirectJump; } else { + inst.branch_not_taken_pc = 0; inst.category = Instruction::kCategoryIndirectJump; } } else if (is_cond) { @@ -1221,8 +1226,15 @@ static bool EvalPCDest(Instruction &inst, const bool s, const unsigned int rd, inst.category = Instruction::kCategoryConditionalBranch; } else { inst.branch_taken_pc = static_cast(*res); + inst.branch_not_taken_pc = 0; inst.category = Instruction::kCategoryDirectJump; } + if (inst.branch_taken_pc % 2u) { + inst.branch_taken_arch_name = ArchName::kArchThumb2LittleEndian; + inst.branch_taken_pc -= 1u; + } else { + inst.branch_taken_arch_name = inst.arch_name; + } } } } else { @@ -1354,9 +1366,10 @@ static bool TryDecodeIntegerDataProcessingRRI(Instruction &inst, // Raise the program counter to align to a multiple of 4 bytes if (enc.rn == kPCRegNum && (enc.opc == 0b100u || enc.opc == 0b010u)) { int64_t diff = - static_cast(inst.pc & ~(3u)) - static_cast(inst.pc); + static_cast(inst.pc & ~(3u)) - + static_cast(inst.pc); AddAddrRegOp(inst, kPCVariableName.data(), kAddressSize, - Operand::kActionRead, diff); + Operand::kActionRead, diff + 8); } else { AddIntRegOp(inst, enc.rn, kAddressSize, Operand::kActionRead); } @@ -1824,7 +1837,9 @@ static bool TryDecodeLoadStoreWordUBReg(Instruction &inst, uint32_t bits) { static_cast(inst.pc & ~(3u)) - static_cast(inst.pc); } - AddShiftRegImmOperand(inst, enc.rm, enc.type, enc.imm5, 0u, false); + // TODO(pag): Changed `can_shift_right_by_32` to `true` but don't know why + // it was previously `false`. + AddShiftRegImmOperand(inst, enc.rm, enc.type, enc.imm5, 0u, true); auto disp_expr = inst.operands.back().expr; auto disp_op = llvm::Instruction::Add; @@ -2702,7 +2717,6 @@ static bool TryIntegerTestAndCompareRI(Instruction &inst, uint32_t bits) { static bool TryBranchImm(Instruction &inst, uint32_t bits) { const BranchI enc = {bits}; auto is_cond = DecodeCondition(inst, enc.cond); - auto is_func = false; // PC used by the branch instruction is actually the address of the next instruction @@ -2717,11 +2731,16 @@ static bool TryBranchImm(Instruction &inst, uint32_t bits) { inst.function = "BL"; is_func = true; } + + inst.branch_taken_arch_name = inst.arch_name; + } else { inst.function = "BLX"; target_pc = target_pc & ~0b11u; target_pc = target_pc | (enc.H << 1); is_func = true; + + inst.branch_taken_arch_name = remill::ArchName::kArchThumb2LittleEndian; } if (is_cond) { inst.function += "COND"; @@ -2771,8 +2790,8 @@ static const char *const kBX[] = { static bool TryDecodeBX(Instruction &inst, uint32_t bits) { const Misc enc = {bits}; - if (enc.op1 == 0b10) { // BJX unsupported - LOG(ERROR) << "BJX unsupported"; + if (enc.op1 == 0b10) { // BXJ (branch and link to Jazelle mode) unsupported + LOG(ERROR) << "BXJ unsupported"; inst.category = Instruction::kCategoryError; return false; } else if (enc.op1 == 0b11 && enc.Rm == kPCRegNum) { @@ -2791,6 +2810,7 @@ static bool TryDecodeBX(Instruction &inst, uint32_t bits) { AddAddrRegOp(inst, kIntRegName[enc.Rm], kAddressSize, Operand::kActionRead, 0); + inst.branch_taken_arch_name = inst.arch_name; inst.branch_not_taken_pc = inst.pc + 4; if (enc.op1 == 0b01) { if (is_cond && (enc.Rm == kLRRegNum)) { @@ -3644,6 +3664,7 @@ bool AArch32Arch::DecodeInstruction(uint64_t address, inst.has_branch_not_taken_delay_slot = false; inst.arch_name = arch_name; inst.sub_arch_name = arch_name; // TODO(pag): Thumb. + inst.branch_taken_arch_name = arch_name; inst.arch = this; inst.category = Instruction::kCategoryInvalid; inst.operands.clear(); diff --git a/lib/Arch/AArch32/Runtime/CMakeLists.txt b/lib/Arch/AArch32/Runtime/CMakeLists.txt index 560e8d054..ffcbf23f1 100644 --- a/lib/Arch/AArch32/Runtime/CMakeLists.txt +++ b/lib/Arch/AArch32/Runtime/CMakeLists.txt @@ -64,3 +64,4 @@ function(add_runtime_helper target_name little_endian) endfunction() add_runtime_helper(aarch32 1) +add_runtime_helper(thumb2 1) diff --git a/lib/Arch/AArch64/Arch.cpp b/lib/Arch/AArch64/Arch.cpp index 32c431197..490108509 100644 --- a/lib/Arch/AArch64/Arch.cpp +++ b/lib/Arch/AArch64/Arch.cpp @@ -33,7 +33,7 @@ #define REMILL_AARCH_STRICT_REGNUM -#include "../Arch.h" // For `Arch` and `ArchImpl`. +#include #include "Decode.h" #include "remill/Arch/Instruction.h" @@ -106,7 +106,7 @@ Instruction::Category InstCategory(const aarch64::InstData &inst) { } } -class AArch64Arch final : public Arch { +class AArch64Arch final : public ArchBase { public: AArch64Arch(llvm::LLVMContext *context_, OSName os_name_, ArchName arch_name_); @@ -139,8 +139,8 @@ class AArch64Arch final : public Arch { // Populate a just-initialized lifted function function with architecture- // specific variables. - void FinishLiftedFunctionInitialization( - llvm::Module *module, llvm::Function *bb_func) const final; + void FinishLiftedFunctionInitialization(llvm::Module *module, + llvm::Function *bb_func) const final; private: AArch64Arch(void) = delete; @@ -148,7 +148,7 @@ class AArch64Arch final : public Arch { AArch64Arch::AArch64Arch(llvm::LLVMContext *context_, OSName os_name_, ArchName arch_name_) - : Arch(context_, os_name_, arch_name_) {} + : ArchBase(context_, os_name_, arch_name_) {} AArch64Arch::~AArch64Arch(void) {} @@ -160,7 +160,7 @@ llvm::CallingConv::ID AArch64Arch::DefaultCallingConv(void) const { // Populate the table of register information. void AArch64Arch::PopulateRegisterTable(void) const { - impl->reg_by_offset.resize(sizeof(AArch64State)); + reg_by_offset.resize(sizeof(AArch64State)); #define OFFSET_OF(type, access) \ (reinterpret_cast(&reinterpret_cast( \ @@ -807,7 +807,7 @@ static void AddExtendRegOperand(Instruction &inst, RegClass reg_class, op.shift_reg.extend_op = Operand::ShiftRegister::kExtendInvalid; op.shift_reg.extract_size = 0; - // Extracting a value that is wider than the register. + // Extracting a value that is wider than the register. } else if (op.shift_reg.extract_size > op.shift_reg.reg.size) { op.shift_reg.extend_op = Operand::ShiftRegister::kExtendInvalid; op.shift_reg.extract_size = 0; @@ -1222,6 +1222,7 @@ bool AArch64Arch::DecodeInstruction(uint64_t address, inst.arch = this; inst.arch_name = arch_name; inst.sub_arch_name = arch_name; // TODO(pag): Thumb. + inst.branch_taken_arch_name = arch_name; inst.pc = address; inst.next_pc = address + kInstructionSize; inst.category = Instruction::kCategoryInvalid; @@ -1794,6 +1795,7 @@ bool TryDecodeB_ONLY_BRANCH_IMM(const InstData &data, Instruction &inst) { AddPCDisp(inst, data.imm26.simm26 << 2LL); inst.branch_taken_pc = static_cast(static_cast(inst.pc) + (data.imm26.simm26 << 2ULL)); + inst.branch_taken_arch_name = inst.arch_name; return true; } @@ -1824,6 +1826,13 @@ static void DecodeConditionalBranch(Instruction &inst, int64_t disp) { inst.branch_taken_pc = static_cast(static_cast(inst.pc) + disp); + if (inst.branch_taken_pc % 2u) { + inst.branch_taken_arch_name = ArchName::kArchThumb2LittleEndian; + inst.branch_taken_pc -= 1u; + } else { + inst.branch_taken_arch_name = inst.arch_name; + } + DecodeFallThroughPC(inst); } @@ -1882,6 +1891,7 @@ bool TryDecodeTBNZ_ONLY_TESTBRANCH(const InstData &data, Instruction &inst) { // BL