From b5fb883823feaf9088cf7caeb7d53f699db94d4e Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Mon, 28 Oct 2024 02:10:44 +0100 Subject: [PATCH] Refactor analysis and output a liveness JSON --- obfuscator/CMakeLists.txt | 16 ++ obfuscator/cmake.toml | 8 +- obfuscator/include/obfuscator/analyze.hpp | 56 +++++- obfuscator/include/obfuscator/context.hpp | 4 +- obfuscator/include/obfuscator/obfuscate.hpp | 2 +- obfuscator/src/obfuscate.cpp | 157 +++++++++++++++- obfuscator/src/obfuscator/analyze.cpp | 189 +++++++------------- obfuscator/src/obfuscator/context.cpp | 73 +++++--- obfuscator/src/obfuscator/obfuscate.cpp | 2 +- 9 files changed, 346 insertions(+), 161 deletions(-) diff --git a/obfuscator/CMakeLists.txt b/obfuscator/CMakeLists.txt index ad71042..49fdfc6 100644 --- a/obfuscator/CMakeLists.txt +++ b/obfuscator/CMakeLists.txt @@ -91,9 +91,24 @@ FetchContent_Declare(linux-pe ) FetchContent_MakeAvailable(linux-pe) +message(STATUS "Fetching json...") +FetchContent_Declare(json + URL + "https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz" + URL_HASH + SHA256=d6c65aca6b1ed68e7a182f4757257b107ae403032760ed6ef121c9d55e81757d +) +FetchContent_MakeAvailable(json) + # Target: obfuscator set(obfuscator_SOURCES cmake.toml + "include/obfuscator/analyze.hpp" + "include/obfuscator/context.hpp" + "include/obfuscator/disassemble.hpp" + "include/obfuscator/msvc-secure.hpp" + "include/obfuscator/obfuscate.hpp" + "include/obfuscator/utility.hpp" "src/obfuscator/analyze.cpp" "src/obfuscator/context.cpp" "src/obfuscator/disassemble.cpp" @@ -136,6 +151,7 @@ if(CMKR_ROOT_PROJECT) # root target_link_libraries(obfuscate PRIVATE riscvm::obfuscator args::args + nlohmann_json::nlohmann_json ) get_directory_property(CMKR_VS_STARTUP_PROJECT DIRECTORY ${PROJECT_SOURCE_DIR} DEFINITION VS_STARTUP_PROJECT) diff --git a/obfuscator/cmake.toml b/obfuscator/cmake.toml index 1530fc7..248bbeb 100644 --- a/obfuscator/cmake.toml +++ b/obfuscator/cmake.toml @@ -29,11 +29,15 @@ sha256 = "312151a2d13c8327f5c9c586ac6cf7cddc1658e8f53edae0ec56509c8fa516c9" git = "https://github.com/can1357/linux-pe" tag = "be6d1f6fc30fb8058b5220e0bb2652a6dc8ec0b0" +[fetch-content.json] +url = "https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz" +sha256 = "d6c65aca6b1ed68e7a182f4757257b107ae403032760ed6ef121c9d55e81757d" + [target.obfuscator] type = "static" alias = "riscvm::obfuscator" sources = ["src/obfuscator/*.cpp"] -headers = ["src/obfuscator/*.hpp"] +headers = ["include/obfuscator/*.hpp"] include-directories = ["include"] compile-features = ["cxx_std_20"] link-libraries = ["zasm::zasm", "linux-pe", "fmt::fmt"] @@ -42,7 +46,7 @@ link-libraries = ["zasm::zasm", "linux-pe", "fmt::fmt"] condition = "root" type = "executable" sources = ["src/obfuscate.cpp"] -link-libraries = ["riscvm::obfuscator", "args::args"] +link-libraries = ["riscvm::obfuscator", "args::args", "nlohmann_json::nlohmann_json"] [target.tests] type = "executable" diff --git a/obfuscator/include/obfuscator/analyze.hpp b/obfuscator/include/obfuscator/analyze.hpp index 6692b49..ff91e4d 100644 --- a/obfuscator/include/obfuscator/analyze.hpp +++ b/obfuscator/include/obfuscator/analyze.hpp @@ -1,8 +1,60 @@ #pragma once +#include +#include +#include + #include namespace obfuscator { -bool analyze(Context& ctx, bool verbose = false); -} +struct BasicBlock +{ + uint64_t address = 0; + zasm::Label label; + zasm::Node* begin = nullptr; + zasm::Node* end = nullptr; + std::vector successors; +}; + +struct BlockLiveness +{ + uint32_t regsGen = 0; + uint32_t regsKill = 0; + uint32_t regsLiveIn = 0; + uint32_t regsLiveOut = 0; + + zasm::InstrCPUFlags flagsGen = 0; + zasm::InstrCPUFlags flagsKill = 0; + zasm::InstrCPUFlags flagsLiveIn = 0; + zasm::InstrCPUFlags flagsLiveOut = 0; +}; + +struct InstructionLiveness +{ + zasm::Node* node = nullptr; + uint32_t regsLive = 0; + zasm::InstrCPUFlags flagsLive = 0; +}; + +class CFG +{ + explicit CFG(const zasm::Program& program) : program(program) + { + } + + public: + const zasm::Program& program; + std::map blocks; + std::set exits; + + static zasm::Expected + analyze(const zasm::Program& program, zasm::Label entry, bool verbose = false); + + std::map> getPredecessors() const; + std::map getLivenessBlocks(bool verbose = false) const; + std::vector + getInstructionLiveness(const std::map& blockLiveness, bool verbose = false) const; +}; + +} // namespace obfuscator diff --git a/obfuscator/include/obfuscator/context.hpp b/obfuscator/include/obfuscator/context.hpp index 8480bea..38d365c 100644 --- a/obfuscator/include/obfuscator/context.hpp +++ b/obfuscator/include/obfuscator/context.hpp @@ -24,6 +24,8 @@ template <> struct fmt::formatter : fmt::formatter maskToFlags(uint32_t mask); std::string formatFlagsMask(uint32_t mask); std::string formatRegsMask(uint64_t mask); std::vector maskToRegs(uint64_t mask); @@ -38,8 +40,8 @@ struct InstructionData uint32_t regsWritten = 0; uint32_t regsRead = 0; - zasm::InstrCPUFlags flagsLive = 0; uint32_t regsLive = 0; + zasm::InstrCPUFlags flagsLive = 0; }; // Stores additional data for nodes in the zasm::Program diff --git a/obfuscator/include/obfuscator/obfuscate.hpp b/obfuscator/include/obfuscator/obfuscate.hpp index 6e50f7d..7ef7f5b 100644 --- a/obfuscator/include/obfuscator/obfuscate.hpp +++ b/obfuscator/include/obfuscator/obfuscate.hpp @@ -4,5 +4,5 @@ namespace obfuscator { -bool obfuscate(Context& ctx); +bool obfuscate(Context& ctx, bool verbose = false); } diff --git a/obfuscator/src/obfuscate.cpp b/obfuscator/src/obfuscate.cpp index c81a22e..cd870d7 100644 --- a/obfuscator/src/obfuscate.cpp +++ b/obfuscator/src/obfuscate.cpp @@ -11,6 +11,7 @@ #include #include +#include using namespace zasm; using namespace obfuscator; @@ -26,6 +27,7 @@ struct Arguments : ArgumentParser std::string output; std::string cleanOutput; std::string payload; + bool verbose = false; Arguments(int argc, char** argv) : ArgumentParser("Obfuscates the riscvm_run function") { @@ -33,13 +35,141 @@ struct Arguments : ArgumentParser addString("-output", output, "Obfuscated function binary blob"); addString("-clean-output", cleanOutput, "Unobfuscated function binary blob"); addString("-payload", payload, "Payload to execute (Windows only)"); + addBool("-verbose", verbose, "Verbose output"); parseOrExit(argc, argv); } }; +static nlohmann::json livenessJson(uint64_t regsLive, InstrCPUFlags flagsLive) +{ + nlohmann::json json; + + std::vector regs; + for (const auto& reg : maskToRegs(regsLive)) + { + auto name = formatter::toString(reg); + for (auto& ch : name) + { + if (ch >= 'a' && ch <= 'z') + { + ch = toupper(ch); + } + } + regs.push_back(std::move(name)); + } + json["regs"] = regs; + + std::vector flags; + for (const auto& flag : maskToFlags(flagsLive)) + { + auto name = flagToString(flag); + if (name != nullptr) + { + flags.push_back(name); + } + } + json["flags"] = flags; + + return json; +} + +static void dumpLiveness(CFG& cfg, const std::map& livenessBlocks) +{ + nlohmann::json json; + // Print the results + std::string script; + for (const auto& [address, block] : cfg.blocks) + { + auto& liveness = livenessBlocks.at(address); + + nlohmann::json blockJson; + blockJson["Liveness in"] = livenessJson(liveness.regsLiveIn, liveness.flagsLiveIn); + blockJson["Liveness out"] = livenessJson(liveness.regsLiveOut, liveness.flagsLiveOut); + + nlohmann::json instrJson; + fmt::println("Results for block {:#x}\n==========", address); + for (auto node = block.begin; node != block.end; node = node->getNext()) + { + auto data = node->getUserData(); + auto str = formatter::toString(cfg.program, node, formatter::Options::HexImmediates); + + instrJson[fmt::format("{:#x}", data->address)] = livenessJson(data->regsLive, data->flagsLive); + + script += "commentset "; + char address[32]; + sprintf_s(address, "0x%llX", data->address); + script += address; + script += ", \""; + if (data->regsLive || data->flagsLive) + { + script += formatRegsMask(data->regsLive); + if (data->flagsLive) + { + script += "|"; + script += formatFlagsMask(data->flagsLive); + } + } + else + { + script += "no live (HA)"; + } + script += "\"\n"; + + fmt::println( + "{:#x}|{}|{}|{}", data->address, str, formatRegsMask(data->regsLive), formatFlagsMask(data->flagsLive) + ); + + if (data->regsRead & ~data->regsLive) + { + fmt::println("\tdead regs read: %s\n", formatRegsMask(data->regsRead & ~data->regsLive).c_str()); + __debugbreak(); + } + } + fmt::println("=========="); + + auto blockLiveness = livenessBlocks.at(address); + fmt::println("\tregs_live_in: {}", formatRegsMask(blockLiveness.regsLiveIn)); + fmt::println("\tregs_live_out: {}", formatRegsMask(blockLiveness.regsLiveOut)); + fmt::println("\tflags_live_in: {}", formatFlagsMask(blockLiveness.flagsLiveIn)); + fmt::println("\tflags_live_out: {}", formatFlagsMask(blockLiveness.flagsLiveOut)); + + blockJson["Instr Liveness"] = std::move(instrJson); + + json[fmt::format("{:#x}", address)] = std::move(blockJson); + } + + fmt::println("{}", script); + + auto toHex = [](uint64_t value) + { + char buffer[64] = ""; + sprintf_s(buffer, "\"0x%llX\"", value); + return std::string(buffer); + }; + + std::string dot = "digraph G {\n"; + for (const auto& [address, block] : cfg.blocks) + { + dot += toHex(address) + " [label=\"" + cfg.program.getLabelData(block.label).value().name + "\"];\n"; + for (const auto& successor : block.successors) + { + auto data = cfg.program.getLabelData(successor).value().node->getUserData(); + auto successorAddress = data->address; + dot += toHex(address) + " -> " + toHex(successorAddress) + ";\n"; + } + } + dot += "}"; + + fmt::println("{}", dot); + + std::ofstream ofs("liveness.json"); + ofs << json.dump(2); +} + int main(int argc, char** argv) { Arguments args(argc, argv); + auto verbose = args.verbose; std::vector pe; if (!loadFile(args.input, pe)) @@ -60,18 +190,37 @@ int main(int argc, char** argv) Program program(MachineMode::AMD64); Context ctx(program); - if (!disassemble(ctx, riscvmRunAddress, riscvmRunCode)) + if (!disassemble(ctx, riscvmRunAddress, riscvmRunCode, verbose)) { fmt::println("Failed to disassemble riscvm_run function."); return EXIT_FAILURE; } - if (!analyze(ctx, true)) + // Analyze the CFG + auto cfg = CFG::analyze(program, program.getEntryPoint(), verbose); + if (!cfg) { - fmt::println("Failed to analyze the riscvm_run function."); + fmt::println("Failed to analyze the riscvm_run function: {}", cfg.error()); return EXIT_FAILURE; } + // Perform liveness analysis + auto livenessBlocks = cfg->getLivenessBlocks(verbose); + auto instructionLiveness = cfg->getInstructionLiveness(livenessBlocks, verbose); + + // Add liveness information to the instruction data + for (const auto& instruction : instructionLiveness) + { + auto data = instruction.node->getUserData(); + data->regsLive = instruction.regsLive; + data->flagsLive = instruction.flagsLive; + } + + if (verbose) + { + dumpLiveness(*cfg, livenessBlocks); + } + auto serializeToFile = [&program](const std::string& outputFile, uint64_t base = 0) { // Serialize the obfuscated function @@ -96,7 +245,7 @@ int main(int argc, char** argv) return EXIT_FAILURE; } - if (!obfuscate(ctx)) + if (!obfuscate(ctx, verbose)) { fmt::println("Failed to obfuscate riscvm_run function."); return EXIT_FAILURE; diff --git a/obfuscator/src/obfuscator/analyze.cpp b/obfuscator/src/obfuscator/analyze.cpp index b41c780..2b56f24 100644 --- a/obfuscator/src/obfuscator/analyze.cpp +++ b/obfuscator/src/obfuscator/analyze.cpp @@ -13,38 +13,18 @@ namespace obfuscator using namespace zasm; -bool analyze(Context& ctx, bool verbose) +Expected CFG::analyze(const zasm::Program& program, Label entry, bool verbose) { - Program& program = ctx.program; - auto mode = program.getMode(); + CFG cfg(program); + if (verbose) + { fmt::println("=== ANALYZE ==="); + } std::vector