From 4f425fbcb8db360dce0d3f4588fcd414e0b257c6 Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Wed, 22 Nov 2023 16:04:19 +0000 Subject: [PATCH 01/17] log symbolic areas --- runtime/CMakeLists.txt | 1 + runtime/ShadowTrace.cpp | 15 +++++++++++++++ runtime/ShadowTrace.h | 8 ++++++++ runtime/qsym_backend/Runtime.cpp | 2 ++ 4 files changed, 26 insertions(+) create mode 100644 runtime/ShadowTrace.cpp create mode 100644 runtime/ShadowTrace.h diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 07277b8f..7feed851 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -34,6 +34,7 @@ set(SHARED_RUNTIME_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/RuntimeCommon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/LibcWrappers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Shadow.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ShadowTrace.cpp ${CMAKE_CURRENT_SOURCE_DIR}/GarbageCollection.cpp) if (${QSYM_BACKEND}) diff --git a/runtime/ShadowTrace.cpp b/runtime/ShadowTrace.cpp new file mode 100644 index 00000000..61a7fdbd --- /dev/null +++ b/runtime/ShadowTrace.cpp @@ -0,0 +1,15 @@ + +#include "ShadowTrace.h" +#include "Shadow.h" + +void dumpSymbolicState() { + for (auto const& [pageAddress, _] : g_shadow_pages) { + for(auto byteAddress = pageAddress; byteAddress < pageAddress + kPageSize; byteAddress++) { + auto byteExpr = _sym_read_memory((u_int8_t *) byteAddress, 1, true); + if (byteExpr != nullptr && !byteExpr->isConcrete()) { + printf("%lx has symbolic value\n", byteAddress); + } + } + } + printf("\n"); +} \ No newline at end of file diff --git a/runtime/ShadowTrace.h b/runtime/ShadowTrace.h new file mode 100644 index 00000000..fda0fa0b --- /dev/null +++ b/runtime/ShadowTrace.h @@ -0,0 +1,8 @@ + +#ifndef SYMRUNTIME_SHADOWTRACE_H +#define SYMRUNTIME_SHADOWTRACE_H + +#include +void dumpSymbolicState(); + +#endif // SYMRUNTIME_SHADOWTRACE_H diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index 650787bf..e84ffe61 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -19,6 +19,7 @@ #include "Runtime.h" #include "GarbageCollection.h" +#include "ShadowTrace.h" // C++ #if __has_include() @@ -415,6 +416,7 @@ void _sym_notify_ret(uintptr_t site_id) { } void _sym_notify_basic_block(uintptr_t site_id) { + dumpSymbolicState(); g_call_stack_manager.visitBasicBlock(site_id); } From 22373f0efc20f3ce900809ebbf8e00260a529869 Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Fri, 24 Nov 2023 13:26:18 +0000 Subject: [PATCH 02/17] save tracing info in json file instead of stdout --- runtime/CMakeLists.txt | 2 +- runtime/RuntimeCommon.cpp | 9 ++++++++ runtime/RuntimeCommon.h | 2 ++ runtime/ShadowTrace.cpp | 15 ------------- runtime/ShadowTrace.h | 8 ------- runtime/Tracer.cpp | 34 +++++++++++++++++++++++++++++ runtime/Tracer.h | 19 ++++++++++++++++ runtime/qsym_backend/CMakeLists.txt | 12 +++++++--- runtime/qsym_backend/Runtime.cpp | 3 +-- 9 files changed, 75 insertions(+), 29 deletions(-) delete mode 100644 runtime/ShadowTrace.cpp delete mode 100644 runtime/ShadowTrace.h create mode 100644 runtime/Tracer.cpp create mode 100644 runtime/Tracer.h diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 7feed851..b354c3bc 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -34,7 +34,7 @@ set(SHARED_RUNTIME_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/RuntimeCommon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/LibcWrappers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Shadow.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/ShadowTrace.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/Tracer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/GarbageCollection.cpp) if (${QSYM_BACKEND}) diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index 127c81de..aa94b507 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -27,6 +27,7 @@ #include "GarbageCollection.h" #include "RuntimeCommon.h" #include "Shadow.h" +#include "Tracer.h" namespace { @@ -486,3 +487,11 @@ SymExpr _sym_build_bit_to_bool(SymExpr expr) { return _sym_build_not_equal(expr, _sym_build_integer(0, _sym_bits_helper(expr))); } + +void _sym_trace_execution(uintptr_t pc) { + Tracer::trace(pc); +} + +void _sym_finalize_tracing() { + Tracer::writeTraceToDisk(); +} \ No newline at end of file diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index 4c05ceb0..8218a694 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -211,6 +211,8 @@ void _sym_notify_basic_block(uintptr_t site_id); */ const char *_sym_expr_to_string(SymExpr expr); // statically allocated bool _sym_feasible(SymExpr expr); +void _sym_trace_execution(uintptr_t pc); +void _sym_finalize_tracing(void); /* * Garbage collection diff --git a/runtime/ShadowTrace.cpp b/runtime/ShadowTrace.cpp deleted file mode 100644 index 61a7fdbd..00000000 --- a/runtime/ShadowTrace.cpp +++ /dev/null @@ -1,15 +0,0 @@ - -#include "ShadowTrace.h" -#include "Shadow.h" - -void dumpSymbolicState() { - for (auto const& [pageAddress, _] : g_shadow_pages) { - for(auto byteAddress = pageAddress; byteAddress < pageAddress + kPageSize; byteAddress++) { - auto byteExpr = _sym_read_memory((u_int8_t *) byteAddress, 1, true); - if (byteExpr != nullptr && !byteExpr->isConcrete()) { - printf("%lx has symbolic value\n", byteAddress); - } - } - } - printf("\n"); -} \ No newline at end of file diff --git a/runtime/ShadowTrace.h b/runtime/ShadowTrace.h deleted file mode 100644 index fda0fa0b..00000000 --- a/runtime/ShadowTrace.h +++ /dev/null @@ -1,8 +0,0 @@ - -#ifndef SYMRUNTIME_SHADOWTRACE_H -#define SYMRUNTIME_SHADOWTRACE_H - -#include -void dumpSymbolicState(); - -#endif // SYMRUNTIME_SHADOWTRACE_H diff --git a/runtime/Tracer.cpp b/runtime/Tracer.cpp new file mode 100644 index 00000000..0ab9f041 --- /dev/null +++ b/runtime/Tracer.cpp @@ -0,0 +1,34 @@ + +#include "Tracer.h" +#include "Shadow.h" + +nlohmann::json Tracer::currentTrace; +const std::string Tracer::BACKEND_TRACE_FILE = "/tmp/backend_trace.json"; + +void Tracer::trace(uintptr_t pc) { + + + std::vector symbolicAddresses; + + for (auto const &[pageAddress, _] : g_shadow_pages) { + for (auto byteAddress = pageAddress; byteAddress < pageAddress + kPageSize; + byteAddress++) { + auto byteExpr = _sym_read_memory((u_int8_t *)byteAddress, 1, true); + if (byteExpr != nullptr && !byteExpr->isConcrete()) { + symbolicAddresses.push_back(byteAddress); + } + } + } + + nlohmann::json newEntry; + newEntry["pc"] = pc; + newEntry["symbolicAddresses"] = symbolicAddresses; + + currentTrace.push_back(newEntry); +} + + +void Tracer::writeTraceToDisk() { + std::ofstream o(BACKEND_TRACE_FILE); + o << std::setw(4) << currentTrace << std::endl; +} \ No newline at end of file diff --git a/runtime/Tracer.h b/runtime/Tracer.h new file mode 100644 index 00000000..21e1fe75 --- /dev/null +++ b/runtime/Tracer.h @@ -0,0 +1,19 @@ + +#ifndef SYMRUNTIME_TRACER_H +#define SYMRUNTIME_TRACER_H + +#include +#include +#include + +class Tracer { +public: + static void trace(uintptr_t pc); + static void writeTraceToDisk(); + +private: + static nlohmann::json currentTrace; + static const std::string BACKEND_TRACE_FILE; +}; + +#endif // SYMRUNTIME_TRACER_H diff --git a/runtime/qsym_backend/CMakeLists.txt b/runtime/qsym_backend/CMakeLists.txt index 34c01320..cd5f40db 100644 --- a/runtime/qsym_backend/CMakeLists.txt +++ b/runtime/qsym_backend/CMakeLists.txt @@ -83,7 +83,13 @@ target_include_directories(SymRuntime SYSTEM PRIVATE # We need to get the LLVM support component for llvm::APInt. llvm_map_components_to_libnames(QSYM_LLVM_DEPS support) -target_link_libraries(SymRuntime ${Z3_LIBRARIES} ${QSYM_LLVM_DEPS}) +include(FetchContent) + +FetchContent_Declare(json URL https://github.com/nlohmann/json/releases/download/v3.11.2/json.tar.xz) +FetchContent_MakeAvailable(json) + + +target_link_libraries(SymRuntime ${Z3_LIBRARIES} ${QSYM_LLVM_DEPS} nlohmann_json::nlohmann_json) # We use std::filesystem, which has been added in C++17. Before its official # inclusion in the standard library, Clang shipped the feature first in @@ -92,5 +98,5 @@ target_link_libraries(SymRuntime ${Z3_LIBRARIES} ${QSYM_LLVM_DEPS}) # some current LTS distributions ship a GCC that requires libstdc++fs for # std::filesystem - we catch this case in order to enable users of such systems # to build with the default compiler. -find_package(Filesystem COMPONENTS Final Experimental) -target_link_libraries(SymRuntime std::filesystem) +#find_package(Filesystem COMPONENTS Final Experimental) +#target_link_libraries(SymRuntime std::filesystem) diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index e84ffe61..51ea7d66 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -19,7 +19,7 @@ #include "Runtime.h" #include "GarbageCollection.h" -#include "ShadowTrace.h" +#include "Tracer.h" // C++ #if __has_include() @@ -416,7 +416,6 @@ void _sym_notify_ret(uintptr_t site_id) { } void _sym_notify_basic_block(uintptr_t site_id) { - dumpSymbolicState(); g_call_stack_manager.visitBasicBlock(site_id); } From f3bc3bbd5f213a2c88907146b13eccd107d3d363 Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Mon, 27 Nov 2023 13:43:18 +0000 Subject: [PATCH 03/17] disable garbage collection for tracing --- runtime/qsym_backend/Runtime.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index 51ea7d66..0c651349 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -450,7 +450,7 @@ bool _sym_feasible(SymExpr expr) { // void _sym_collect_garbage() { - if (allocatedExpressions.size() < g_config.garbageCollectionThreshold) +// if (allocatedExpressions.size() < g_config.garbageCollectionThreshold) return; #ifdef DEBUG_RUNTIME From e1e9b5c2aac0d4a2262fd46d238ebdc06ae98783 Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Mon, 27 Nov 2023 15:20:41 +0000 Subject: [PATCH 04/17] provide all symbols in tracing output --- runtime/Tracer.cpp | 28 +++++++++++++++++++++++++--- runtime/Tracer.h | 4 ++++ runtime/qsym_backend/Runtime.cpp | 16 +++++++++++----- runtime/qsym_backend/Runtime.h | 2 ++ 4 files changed, 42 insertions(+), 8 deletions(-) diff --git a/runtime/Tracer.cpp b/runtime/Tracer.cpp index 0ab9f041..162ad0ba 100644 --- a/runtime/Tracer.cpp +++ b/runtime/Tracer.cpp @@ -3,11 +3,11 @@ #include "Shadow.h" nlohmann::json Tracer::currentTrace; +nlohmann::json Tracer::symbols; const std::string Tracer::BACKEND_TRACE_FILE = "/tmp/backend_trace.json"; void Tracer::trace(uintptr_t pc) { - std::vector symbolicAddresses; for (auto const &[pageAddress, _] : g_shadow_pages) { @@ -27,8 +27,30 @@ void Tracer::trace(uintptr_t pc) { currentTrace.push_back(newEntry); } - void Tracer::writeTraceToDisk() { + for (auto const &[symbolPtr, _] : getAllocatedExpressions()) { + recursivelyCollectSymbols(symbolPtr); + } + + nlohmann::json dataToSave; + dataToSave["trace"] = currentTrace; + dataToSave["symbols"] = symbols; + std::ofstream o(BACKEND_TRACE_FILE); - o << std::setw(4) << currentTrace << std::endl; + o << std::setw(4) << dataToSave << std::endl; +} + +void Tracer::recursivelyCollectSymbols(SymExpr symbolPtr) { + string symbolID = std::to_string(reinterpret_cast(symbolPtr)); + if (symbols.count(symbolID) > 0) { + return; + } + + symbols[symbolID]["type"] = symbolPtr->kind(); + for (int child_i = 0; child_i < symbolPtr->num_children(); child_i++) { + SymExpr child = symbolPtr->getChild(child_i).get(); + string childID = std::to_string(reinterpret_cast(child)); + symbols[symbolID]["children"].push_back(childID); + recursivelyCollectSymbols(child); + } } \ No newline at end of file diff --git a/runtime/Tracer.h b/runtime/Tracer.h index 21e1fe75..dfa286e2 100644 --- a/runtime/Tracer.h +++ b/runtime/Tracer.h @@ -2,6 +2,7 @@ #ifndef SYMRUNTIME_TRACER_H #define SYMRUNTIME_TRACER_H +#include "Runtime.h" #include #include #include @@ -12,7 +13,10 @@ class Tracer { static void writeTraceToDisk(); private: + static void recursivelyCollectSymbols(SymExpr symbol); + static nlohmann::json currentTrace; + static nlohmann::json symbols; static const std::string BACKEND_TRACE_FILE; }; diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index 0c651349..0c23d833 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -76,11 +76,6 @@ z3::context *g_z3_context; } // namespace qsym -namespace { - -/// Indicate whether the runtime has been initialized. -std::atomic_flag g_initialized = ATOMIC_FLAG_INIT; - /// A mapping of all expressions that we have ever received from QSYM to the /// corresponding shared pointers on the heap. /// @@ -92,6 +87,17 @@ std::atomic_flag g_initialized = ATOMIC_FLAG_INIT; /// workload. std::map allocatedExpressions; +std::map &getAllocatedExpressions() { + return allocatedExpressions; +} + +namespace { + +/// Indicate whether the runtime has been initialized. +std::atomic_flag g_initialized = ATOMIC_FLAG_INIT; + + + SymExpr registerExpression(const qsym::ExprRef &expr) { SymExpr rawExpr = expr.get(); diff --git a/runtime/qsym_backend/Runtime.h b/runtime/qsym_backend/Runtime.h index 8f19d2af..27a15aa2 100644 --- a/runtime/qsym_backend/Runtime.h +++ b/runtime/qsym_backend/Runtime.h @@ -21,4 +21,6 @@ typedef qsym::Expr *SymExpr; #include +std::map &getAllocatedExpressions(); + #endif From eeb921a676e0845e3040761725472b2cea84f4ba Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Mon, 27 Nov 2023 15:44:32 +0000 Subject: [PATCH 05/17] give symbol id with each symbolic address --- runtime/Tracer.cpp | 23 ++++++++++++++--------- runtime/Tracer.h | 1 + 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/runtime/Tracer.cpp b/runtime/Tracer.cpp index 162ad0ba..29a41d60 100644 --- a/runtime/Tracer.cpp +++ b/runtime/Tracer.cpp @@ -8,22 +8,23 @@ const std::string Tracer::BACKEND_TRACE_FILE = "/tmp/backend_trace.json"; void Tracer::trace(uintptr_t pc) { - std::vector symbolicAddresses; + nlohmann::json newEntry; + newEntry["pc"] = pc; for (auto const &[pageAddress, _] : g_shadow_pages) { for (auto byteAddress = pageAddress; byteAddress < pageAddress + kPageSize; byteAddress++) { auto byteExpr = _sym_read_memory((u_int8_t *)byteAddress, 1, true); if (byteExpr != nullptr && !byteExpr->isConcrete()) { - symbolicAddresses.push_back(byteAddress); + nlohmann::json symbolicAddress; + symbolicAddress["address"] = byteAddress; + symbolicAddress["symbol"] = getSymbolID(byteExpr); + + newEntry["symbolicAddresses"].push_back(symbolicAddress); } } } - nlohmann::json newEntry; - newEntry["pc"] = pc; - newEntry["symbolicAddresses"] = symbolicAddresses; - currentTrace.push_back(newEntry); } @@ -41,16 +42,20 @@ void Tracer::writeTraceToDisk() { } void Tracer::recursivelyCollectSymbols(SymExpr symbolPtr) { - string symbolID = std::to_string(reinterpret_cast(symbolPtr)); + string symbolID = getSymbolID(symbolPtr); if (symbols.count(symbolID) > 0) { return; } - symbols[symbolID]["type"] = symbolPtr->kind(); + symbols[symbolID]["kink"] = symbolPtr->kind(); for (int child_i = 0; child_i < symbolPtr->num_children(); child_i++) { SymExpr child = symbolPtr->getChild(child_i).get(); - string childID = std::to_string(reinterpret_cast(child)); + string childID = getSymbolID(child); symbols[symbolID]["children"].push_back(childID); recursivelyCollectSymbols(child); } +} + +string Tracer::getSymbolID(SymExpr symbol) { + return std::to_string(reinterpret_cast(symbol)); } \ No newline at end of file diff --git a/runtime/Tracer.h b/runtime/Tracer.h index dfa286e2..61d1f26a 100644 --- a/runtime/Tracer.h +++ b/runtime/Tracer.h @@ -14,6 +14,7 @@ class Tracer { private: static void recursivelyCollectSymbols(SymExpr symbol); + static string getSymbolID(SymExpr symbol); static nlohmann::json currentTrace; static nlohmann::json symbols; From f4c799abd950341586cd4dcb55eaee2df17cdc87 Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Mon, 27 Nov 2023 17:59:54 +0000 Subject: [PATCH 06/17] update tracing output format --- runtime/Tracer.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/runtime/Tracer.cpp b/runtime/Tracer.cpp index 29a41d60..cf43f276 100644 --- a/runtime/Tracer.cpp +++ b/runtime/Tracer.cpp @@ -10,6 +10,7 @@ void Tracer::trace(uintptr_t pc) { nlohmann::json newEntry; newEntry["pc"] = pc; + newEntry["memory_to_symbol_mapping"] = nlohmann::json::object(); for (auto const &[pageAddress, _] : g_shadow_pages) { for (auto byteAddress = pageAddress; byteAddress < pageAddress + kPageSize; @@ -20,7 +21,7 @@ void Tracer::trace(uintptr_t pc) { symbolicAddress["address"] = byteAddress; symbolicAddress["symbol"] = getSymbolID(byteExpr); - newEntry["symbolicAddresses"].push_back(symbolicAddress); + newEntry["memory_to_symbol_mapping"][std::to_string(reinterpret_cast(byteAddress))] = getSymbolID(byteExpr); } } } @@ -47,11 +48,12 @@ void Tracer::recursivelyCollectSymbols(SymExpr symbolPtr) { return; } - symbols[symbolID]["kink"] = symbolPtr->kind(); + symbols[symbolID]["kind"] = symbolPtr->kind(); + symbols[symbolID]["args"] = nlohmann::json::array(); for (int child_i = 0; child_i < symbolPtr->num_children(); child_i++) { SymExpr child = symbolPtr->getChild(child_i).get(); string childID = getSymbolID(child); - symbols[symbolID]["children"].push_back(childID); + symbols[symbolID]["args"].push_back(childID); recursivelyCollectSymbols(child); } } From 0c43f2b4e00ac41721eeec8f58d5a083e66128a9 Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Tue, 28 Nov 2023 14:27:48 +0000 Subject: [PATCH 07/17] path constraints tracing --- runtime/RuntimeCommon.cpp | 3 +-- runtime/Tracer.cpp | 29 +++++++++++++++++++++++++++-- runtime/Tracer.h | 5 ++++- runtime/qsym_backend/Runtime.cpp | 1 + 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index aa94b507..6d6da672 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -488,8 +488,7 @@ SymExpr _sym_build_bit_to_bool(SymExpr expr) { _sym_build_integer(0, _sym_bits_helper(expr))); } -void _sym_trace_execution(uintptr_t pc) { - Tracer::trace(pc); +void _sym_trace_execution(uintptr_t pc) { Tracer::traceStep(pc); } void _sym_finalize_tracing() { diff --git a/runtime/Tracer.cpp b/runtime/Tracer.cpp index cf43f276..21689b21 100644 --- a/runtime/Tracer.cpp +++ b/runtime/Tracer.cpp @@ -4,9 +4,10 @@ nlohmann::json Tracer::currentTrace; nlohmann::json Tracer::symbols; +nlohmann::json Tracer::pathConstraints; const std::string Tracer::BACKEND_TRACE_FILE = "/tmp/backend_trace.json"; -void Tracer::trace(uintptr_t pc) { +void Tracer::traceStep(uintptr_t pc) { nlohmann::json newEntry; newEntry["pc"] = pc; @@ -21,7 +22,9 @@ void Tracer::trace(uintptr_t pc) { symbolicAddress["address"] = byteAddress; symbolicAddress["symbol"] = getSymbolID(byteExpr); - newEntry["memory_to_symbol_mapping"][std::to_string(reinterpret_cast(byteAddress))] = getSymbolID(byteExpr); + newEntry["memory_to_symbol_mapping"] + [std::to_string(reinterpret_cast(byteAddress))] = + getSymbolID(byteExpr); } } } @@ -29,6 +32,27 @@ void Tracer::trace(uintptr_t pc) { currentTrace.push_back(newEntry); } +void Tracer::tracePathConstraint(SymExpr constraint) { + if (pathConstraints.empty()) { + symcc_set_test_case_handler( + reinterpret_cast(traceNewInput)); + } + + nlohmann::json newEntry; + newEntry["symbol"] = getSymbolID(constraint); + newEntry["after_step"] = currentTrace.size() - 1; + newEntry["new_input_value"] = nlohmann::json::array(); + + pathConstraints.push_back(newEntry); +} + +void Tracer::traceNewInput(const unsigned char *input, size_t size) { + for (size_t i = 0; i < size; i++) { + pathConstraints[pathConstraints.size() - 1]["new_input_value"].push_back( + input[i]); + } +} + void Tracer::writeTraceToDisk() { for (auto const &[symbolPtr, _] : getAllocatedExpressions()) { recursivelyCollectSymbols(symbolPtr); @@ -37,6 +61,7 @@ void Tracer::writeTraceToDisk() { nlohmann::json dataToSave; dataToSave["trace"] = currentTrace; dataToSave["symbols"] = symbols; + dataToSave["path_constraints"] = pathConstraints; std::ofstream o(BACKEND_TRACE_FILE); o << std::setw(4) << dataToSave << std::endl; diff --git a/runtime/Tracer.h b/runtime/Tracer.h index 61d1f26a..02909a1a 100644 --- a/runtime/Tracer.h +++ b/runtime/Tracer.h @@ -9,7 +9,9 @@ class Tracer { public: - static void trace(uintptr_t pc); + static void traceStep(uintptr_t pc); + static void tracePathConstraint(SymExpr constraint); + static void traceNewInput(const unsigned char* input, size_t size); static void writeTraceToDisk(); private: @@ -18,6 +20,7 @@ class Tracer { static nlohmann::json currentTrace; static nlohmann::json symbols; + static nlohmann::json pathConstraints; static const std::string BACKEND_TRACE_FILE; }; diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index 0c23d833..d1c22493 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -328,6 +328,7 @@ void _sym_push_path_constraint(SymExpr constraint, int taken, if (constraint == nullptr) return; + Tracer::tracePathConstraint(constraint); g_solver->addJcc(allocatedExpressions.at(constraint), taken != 0, site_id); } From eed93ba03e84df9da41db72152d965e7e6479256 Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Wed, 29 Nov 2023 12:53:15 +0000 Subject: [PATCH 08/17] add registered regions to symbolic tracing --- runtime/GarbageCollection.cpp | 4 ++++ runtime/GarbageCollection.h | 1 + runtime/Tracer.cpp | 21 +++++++++++++++------ runtime/Tracer.h | 1 + 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/runtime/GarbageCollection.cpp b/runtime/GarbageCollection.cpp index 8afdd327..01c9be7f 100644 --- a/runtime/GarbageCollection.cpp +++ b/runtime/GarbageCollection.cpp @@ -23,6 +23,10 @@ /// A list of memory regions that are known to contain symbolic expressions. std::vector expressionRegions; +std::vector& getExpressionRegions() { + return expressionRegions; +} + void registerExpressionRegion(ExpressionRegion r) { expressionRegions.push_back(std::move(r)); } diff --git a/runtime/GarbageCollection.h b/runtime/GarbageCollection.h index 81b0b8c2..0a118ed4 100644 --- a/runtime/GarbageCollection.h +++ b/runtime/GarbageCollection.h @@ -28,6 +28,7 @@ using ExpressionRegion = std::pair; /// Add the specified region to the list of places to search for symbolic /// expressions. void registerExpressionRegion(ExpressionRegion r); +std::vector& getExpressionRegions(); /// Return the set of currently reachable symbolic expressions. std::set collectReachableExpressions(); diff --git a/runtime/Tracer.cpp b/runtime/Tracer.cpp index 21689b21..1cf91cfb 100644 --- a/runtime/Tracer.cpp +++ b/runtime/Tracer.cpp @@ -18,20 +18,29 @@ void Tracer::traceStep(uintptr_t pc) { byteAddress++) { auto byteExpr = _sym_read_memory((u_int8_t *)byteAddress, 1, true); if (byteExpr != nullptr && !byteExpr->isConcrete()) { - nlohmann::json symbolicAddress; - symbolicAddress["address"] = byteAddress; - symbolicAddress["symbol"] = getSymbolID(byteExpr); - newEntry["memory_to_symbol_mapping"] - [std::to_string(reinterpret_cast(byteAddress))] = + newEntry["memory_to_symbol_mapping"][std::to_string(reinterpret_cast(byteAddress))] = getSymbolID(byteExpr); } } } + for (auto &expressionRegion : getExpressionRegions()){ + for (auto byteAddress = expressionRegion.first; byteAddress < expressionRegion.first + expressionRegion.second / sizeof(byteAddress); + byteAddress++) { + auto byteExpr = *byteAddress; + if (byteExpr != nullptr && !byteExpr->isConcrete()) { + + newEntry["memory_to_symbol_mapping"][std::to_string(reinterpret_cast(byteAddress))] = + getSymbolID(byteExpr); + } + } + } + currentTrace.push_back(newEntry); } + void Tracer::tracePathConstraint(SymExpr constraint) { if (pathConstraints.empty()) { symcc_set_test_case_handler( @@ -41,7 +50,7 @@ void Tracer::tracePathConstraint(SymExpr constraint) { nlohmann::json newEntry; newEntry["symbol"] = getSymbolID(constraint); newEntry["after_step"] = currentTrace.size() - 1; - newEntry["new_input_value"] = nlohmann::json::array(); + newEntry["new_input_value"] = nlohmann::json(); pathConstraints.push_back(newEntry); } diff --git a/runtime/Tracer.h b/runtime/Tracer.h index 02909a1a..731ee69c 100644 --- a/runtime/Tracer.h +++ b/runtime/Tracer.h @@ -2,6 +2,7 @@ #ifndef SYMRUNTIME_TRACER_H #define SYMRUNTIME_TRACER_H +#include "GarbageCollection.h" #include "Runtime.h" #include #include From cd035432c7747c7269afad370cb9cfed67adb781 Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Mon, 4 Dec 2023 15:01:07 +0000 Subject: [PATCH 09/17] add constants value, size and input dependency to symbolic trace --- runtime/Tracer.cpp | 24 ++++++++++++++++++------ runtime/Tracer.h | 2 +- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/runtime/Tracer.cpp b/runtime/Tracer.cpp index 1cf91cfb..13626662 100644 --- a/runtime/Tracer.cpp +++ b/runtime/Tracer.cpp @@ -1,6 +1,7 @@ #include "Tracer.h" #include "Shadow.h" +#include "llvm/Support/raw_ostream.h" nlohmann::json Tracer::currentTrace; nlohmann::json Tracer::symbols; @@ -63,7 +64,7 @@ void Tracer::traceNewInput(const unsigned char *input, size_t size) { } void Tracer::writeTraceToDisk() { - for (auto const &[symbolPtr, _] : getAllocatedExpressions()) { + for (auto const &[_, symbolPtr] : getAllocatedExpressions()) { recursivelyCollectSymbols(symbolPtr); } @@ -76,17 +77,28 @@ void Tracer::writeTraceToDisk() { o << std::setw(4) << dataToSave << std::endl; } -void Tracer::recursivelyCollectSymbols(SymExpr symbolPtr) { - string symbolID = getSymbolID(symbolPtr); +void Tracer::recursivelyCollectSymbols(const shared_ptr& symbolPtr) { + string symbolID = getSymbolID(symbolPtr.get()); if (symbols.count(symbolID) > 0) { return; } - symbols[symbolID]["kind"] = symbolPtr->kind(); + symbols[symbolID]["operation"]["kind"] = symbolPtr->kind(); + symbols[symbolID]["operation"]["properties"] = nlohmann::json::object(); + if (symbolPtr->kind() == qsym::Constant){ + auto value_llvm_int = static_pointer_cast(symbolPtr)->value(); + std::string value_str; + llvm::raw_string_ostream rso(value_str); + value_llvm_int.print(rso, false); + + symbols[symbolID]["operation"]["properties"]["value"] = value_str; + } + symbols[symbolID]["size_bits"] = symbolPtr->bits(); + symbols[symbolID]["input_byte_dependency"] = *symbolPtr->getDependencies(); symbols[symbolID]["args"] = nlohmann::json::array(); for (int child_i = 0; child_i < symbolPtr->num_children(); child_i++) { - SymExpr child = symbolPtr->getChild(child_i).get(); - string childID = getSymbolID(child); + shared_ptr child = symbolPtr->getChild(child_i); + string childID = getSymbolID(child.get()); symbols[symbolID]["args"].push_back(childID); recursivelyCollectSymbols(child); } diff --git a/runtime/Tracer.h b/runtime/Tracer.h index 731ee69c..f5b6ec1a 100644 --- a/runtime/Tracer.h +++ b/runtime/Tracer.h @@ -16,7 +16,7 @@ class Tracer { static void writeTraceToDisk(); private: - static void recursivelyCollectSymbols(SymExpr symbol); + static void recursivelyCollectSymbols(const shared_ptr& symbolPtr); static string getSymbolID(SymExpr symbol); static nlohmann::json currentTrace; From a2ee76c3b237cf8dfaa7bff2b47cd4708fa47ae0 Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Mon, 4 Dec 2023 18:07:44 +0000 Subject: [PATCH 10/17] add taken info to tracing --- runtime/Tracer.cpp | 3 ++- runtime/Tracer.h | 2 +- runtime/qsym_backend/Runtime.cpp | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/runtime/Tracer.cpp b/runtime/Tracer.cpp index 13626662..679f8d2c 100644 --- a/runtime/Tracer.cpp +++ b/runtime/Tracer.cpp @@ -42,7 +42,7 @@ void Tracer::traceStep(uintptr_t pc) { } -void Tracer::tracePathConstraint(SymExpr constraint) { +void Tracer::tracePathConstraint(SymExpr constraint, bool taken) { if (pathConstraints.empty()) { symcc_set_test_case_handler( reinterpret_cast(traceNewInput)); @@ -52,6 +52,7 @@ void Tracer::tracePathConstraint(SymExpr constraint) { newEntry["symbol"] = getSymbolID(constraint); newEntry["after_step"] = currentTrace.size() - 1; newEntry["new_input_value"] = nlohmann::json(); + newEntry["taken"] = taken; pathConstraints.push_back(newEntry); } diff --git a/runtime/Tracer.h b/runtime/Tracer.h index f5b6ec1a..092a6057 100644 --- a/runtime/Tracer.h +++ b/runtime/Tracer.h @@ -11,7 +11,7 @@ class Tracer { public: static void traceStep(uintptr_t pc); - static void tracePathConstraint(SymExpr constraint); + static void tracePathConstraint(SymExpr constraint, bool taken); static void traceNewInput(const unsigned char* input, size_t size); static void writeTraceToDisk(); diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index d1c22493..ea4fe3f6 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -328,7 +328,7 @@ void _sym_push_path_constraint(SymExpr constraint, int taken, if (constraint == nullptr) return; - Tracer::tracePathConstraint(constraint); + Tracer::tracePathConstraint(constraint, taken != 0); g_solver->addJcc(allocatedExpressions.at(constraint), taken != 0, site_id); } From fd687bc936a21cd59f39219acfe663fc4b91fe33 Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Wed, 6 Dec 2023 00:02:49 +0000 Subject: [PATCH 11/17] move trace parsing script into standalone util package --- util/symbolic_trace/.gitignore | 4 + util/symbolic_trace/pyproject.toml | 3 + util/symbolic_trace/symcctrace/__init__.py | 2 + util/symbolic_trace/symcctrace/data.py | 115 ++++++++++++++++++++ util/symbolic_trace/symcctrace/parsing.py | 120 +++++++++++++++++++++ 5 files changed, 244 insertions(+) create mode 100644 util/symbolic_trace/.gitignore create mode 100644 util/symbolic_trace/pyproject.toml create mode 100644 util/symbolic_trace/symcctrace/__init__.py create mode 100644 util/symbolic_trace/symcctrace/data.py create mode 100644 util/symbolic_trace/symcctrace/parsing.py diff --git a/util/symbolic_trace/.gitignore b/util/symbolic_trace/.gitignore new file mode 100644 index 00000000..e5a60ce4 --- /dev/null +++ b/util/symbolic_trace/.gitignore @@ -0,0 +1,4 @@ +build +symcctrace.egg-info +.idea +__pycache__ \ No newline at end of file diff --git a/util/symbolic_trace/pyproject.toml b/util/symbolic_trace/pyproject.toml new file mode 100644 index 00000000..1551312a --- /dev/null +++ b/util/symbolic_trace/pyproject.toml @@ -0,0 +1,3 @@ +[project] +name = "symcctrace" +version = "1.0" diff --git a/util/symbolic_trace/symcctrace/__init__.py b/util/symbolic_trace/symcctrace/__init__.py new file mode 100644 index 00000000..746518b7 --- /dev/null +++ b/util/symbolic_trace/symcctrace/__init__.py @@ -0,0 +1,2 @@ +from .parsing import parse_trace +from .data import TraceData \ No newline at end of file diff --git a/util/symbolic_trace/symcctrace/data.py b/util/symbolic_trace/symcctrace/data.py new file mode 100644 index 00000000..3d52f487 --- /dev/null +++ b/util/symbolic_trace/symcctrace/data.py @@ -0,0 +1,115 @@ +import enum +import dataclasses + + +class SymbolKind(enum.Enum): + BOOL = 0 + CONSTANT = 1 + READ = 2 + CONCAT = 3 + EXTRACT = 4 + ZEXT = 5 + SEXT = 6 + ADD = 7 + SUB = 8 + MUL = 9 + UDIV = 10 + SDIV = 11 + UREM = 12 + SREM = 13 + NEG = 14 + NOT = 15 + AND = 16 + OR = 17 + XOR = 18 + SHL = 19 + LSHR = 20 + ASHR = 21 + EQUAL = 22 + DISTINCT = 23 + ULT = 24 + ULE = 25 + UGT = 26 + UGE = 27 + SLT = 28 + SLE = 29 + SGT = 30 + SGE = 31 + LOR = 32 + LAND = 33 + LNOT = 34 + ITE = 35 + ROL = 36 + ROR = 37 + INVALID = 38 + + +@dataclasses.dataclass +class Operation: + kind: SymbolKind + properties: dict + + +@dataclasses.dataclass +class RawSymbol: + operation: Operation + size_bits: int + input_byte_dependency: list[int] + args: list[str] + + +@dataclasses.dataclass +class RawTraceStep: + pc: int + memory_to_symbol_mapping: dict[str, str] + + +@dataclasses.dataclass +class RawPathConstraint: + symbol: str + after_step: int + new_input_value: list[int] | None + taken: bool + + +@dataclasses.dataclass +class RawTraceData: + trace: list[RawTraceStep] + symbols: dict[str, RawSymbol] + path_constraints: list[RawPathConstraint] + + +@dataclasses.dataclass +class Symbol: + operation: Operation + size_bits: int + input_byte_dependency: list[int] + args: list['Symbol'] + + +@dataclasses.dataclass +class TraceStep: + pc: int + memory_to_symbol_mapping: dict[str, Symbol] + + +@dataclasses.dataclass +class PathConstraint: + symbol: Symbol + after_step: TraceStep + new_input_value: bytes | None + taken: bool + + +@dataclasses.dataclass +class MemoryArea: + address: int + name: str + + +@dataclasses.dataclass +class TraceData: + trace: list[TraceStep] + symbols: dict[str, Symbol] + path_constraints: list[PathConstraint] + memory_areas: list[MemoryArea] diff --git a/util/symbolic_trace/symcctrace/parsing.py b/util/symbolic_trace/symcctrace/parsing.py new file mode 100644 index 00000000..2acd85ca --- /dev/null +++ b/util/symbolic_trace/symcctrace/parsing.py @@ -0,0 +1,120 @@ +import functools +import itertools +import json +import math +import pathlib + +import cattrs + +from . import data + +MEMORY_AREA_MAX_DISTANCE = 0x100000 + + +def parse_trace( + trace_file: pathlib.Path, + memory_region_names_file: pathlib.Path, +) -> data.TraceData: + with open(trace_file) as file: + raw_trace_data = cattrs.Converter(forbid_extra_keys=True).structure(json.load(file), data.RawTraceData) + + with open(memory_region_names_file) as file: + def convert_to_memory_area(raw_memory_area: dict[str, str]) -> data.MemoryArea: + return cattrs.Converter(forbid_extra_keys=True).structure(raw_memory_area, data.MemoryArea) + + memory_areas = list(map(convert_to_memory_area, json.load(file))) + + symbols = _convert_symbols(raw_trace_data.symbols) + trace_steps = list( + map(functools.partial(_convert_trace_step, symbols=symbols, memory_areas=memory_areas), raw_trace_data.trace)) + path_constraints = list(map(functools.partial(_convert_path_constraint, symbols=symbols, trace_steps=trace_steps), + raw_trace_data.path_constraints)) + + trace_data = data.TraceData( + trace=trace_steps, + symbols=symbols, + path_constraints=path_constraints, + memory_areas=memory_areas + ) + + return trace_data + + +def _convert_operation(raw_operation: data.Operation, size_bits: int) -> data.Operation: + operation = data.Operation( + kind=data.SymbolKind(raw_operation.kind), + properties=raw_operation.properties + ) + + if 'value' in operation.properties: + operation.properties['value'] = int(operation.properties['value']).to_bytes(math.ceil(size_bits / 8), 'little') + + return operation + + +def _convert_symbols(raw_symbols: dict[str, data.RawSymbol]) -> dict[str, data.Symbol]: + symbols = {} + + def recursively_create_symbol(symbol_id: str): + if symbol_id in symbols: + return symbols[symbol_id] + + raw_symbol = raw_symbols[symbol_id] + args = [recursively_create_symbol(arg) for arg in raw_symbol.args] + + symbol = data.Symbol( + operation=_convert_operation(raw_symbol.operation, raw_symbol.size_bits), + args=args, + input_byte_dependency=raw_symbol.input_byte_dependency, + size_bits=raw_symbol.size_bits + ) + symbols[symbol_id] = symbol + return symbol + + for symbol_id in raw_symbols: + recursively_create_symbol(symbol_id) + + return symbols + + +def _raw_memory_address_to_named_location(raw_memory_address: int, memory_areas: list[data.MemoryArea]) -> str: + def distance(memory_area: data.MemoryArea) -> int: + return raw_memory_address - memory_area.address + + def is_candidate(memory_area: data.MemoryArea) -> bool: + return \ + abs(distance(memory_area)) < MEMORY_AREA_MAX_DISTANCE \ + if memory_area.name == 'stack' \ + else 0 <= distance(memory_area) < MEMORY_AREA_MAX_DISTANCE + + def absolute_distance(memory_area: data.MemoryArea) -> int: + return abs(distance(memory_area)) + + closest_memory_area = min(filter(is_candidate, memory_areas), key=absolute_distance) + + return f'{closest_memory_area.name}+{hex(distance(closest_memory_area))}' + + +def _convert_trace_step(raw_trace_step: data.RawTraceStep, symbols: dict[str, data.Symbol], + memory_areas: list[data.MemoryArea]) -> data.TraceStep: + def convert_mapping(mapping: dict[str, str]) -> dict[str, data.Symbol]: + def convert_mapping_element(raw_address: str, symbol_id: str) -> tuple[str, data.Symbol]: + return _raw_memory_address_to_named_location(int(raw_address), memory_areas), symbols[symbol_id] + + return dict(itertools.starmap(convert_mapping_element, mapping.items())) + + return data.TraceStep( + pc=raw_trace_step.pc, + memory_to_symbol_mapping=convert_mapping(raw_trace_step.memory_to_symbol_mapping) + ) + + +def _convert_path_constraint(raw_path_constraint: data.RawPathConstraint, symbols: dict[str, data.Symbol], + trace_steps: list[data.TraceStep]) -> data.PathConstraint: + return data.PathConstraint( + symbol=symbols[raw_path_constraint.symbol], + after_step=trace_steps[raw_path_constraint.after_step], + new_input_value=bytes( + raw_path_constraint.new_input_value) if raw_path_constraint.new_input_value is not None else None, + taken=raw_path_constraint.taken + ) From b6962fd46ca57e34d5414f3573b2e009bcc0a369 Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Sat, 23 Dec 2023 18:33:39 +0000 Subject: [PATCH 12/17] Refactor and documentation --- runtime/Tracer.cpp | 63 +++++++++++---------- runtime/Tracer.h | 36 +++++++++++- runtime/qsym_backend/Runtime.cpp | 18 +++++- util/symbolic_trace/README.md | 7 +++ util/symbolic_trace/symcctrace/__init__.py | 2 +- util/symbolic_trace/symcctrace/data.py | 22 ++++---- util/symbolic_trace/symcctrace/parsing.py | 66 ++++++++++++---------- 7 files changed, 137 insertions(+), 77 deletions(-) create mode 100644 util/symbolic_trace/README.md diff --git a/runtime/Tracer.cpp b/runtime/Tracer.cpp index 679f8d2c..0ba52c5a 100644 --- a/runtime/Tracer.cpp +++ b/runtime/Tracer.cpp @@ -4,7 +4,7 @@ #include "llvm/Support/raw_ostream.h" nlohmann::json Tracer::currentTrace; -nlohmann::json Tracer::symbols; +nlohmann::json Tracer::expressions; nlohmann::json Tracer::pathConstraints; const std::string Tracer::BACKEND_TRACE_FILE = "/tmp/backend_trace.json"; @@ -12,28 +12,31 @@ void Tracer::traceStep(uintptr_t pc) { nlohmann::json newEntry; newEntry["pc"] = pc; - newEntry["memory_to_symbol_mapping"] = nlohmann::json::object(); + newEntry["memory_to_expression_mapping"] = nlohmann::json::object(); + /* dump shadow pages */ for (auto const &[pageAddress, _] : g_shadow_pages) { for (auto byteAddress = pageAddress; byteAddress < pageAddress + kPageSize; byteAddress++) { auto byteExpr = _sym_read_memory((u_int8_t *)byteAddress, 1, true); if (byteExpr != nullptr && !byteExpr->isConcrete()) { - newEntry["memory_to_symbol_mapping"][std::to_string(reinterpret_cast(byteAddress))] = - getSymbolID(byteExpr); + newEntry["memory_to_expression_mapping"][std::to_string(reinterpret_cast(byteAddress))] = + getExpressionID(byteExpr); } } } + /* dump regions that the client has registered with _sym_register_expression_region + * for SymQEMU, this is env_exprs which contains the expressions for the global TCG variables (i.e., guest registers) */ for (auto &expressionRegion : getExpressionRegions()){ for (auto byteAddress = expressionRegion.first; byteAddress < expressionRegion.first + expressionRegion.second / sizeof(byteAddress); byteAddress++) { auto byteExpr = *byteAddress; if (byteExpr != nullptr && !byteExpr->isConcrete()) { - newEntry["memory_to_symbol_mapping"][std::to_string(reinterpret_cast(byteAddress))] = - getSymbolID(byteExpr); + newEntry["memory_to_expression_mapping"][std::to_string(reinterpret_cast(byteAddress))] = + getExpressionID(byteExpr); } } } @@ -44,12 +47,12 @@ void Tracer::traceStep(uintptr_t pc) { void Tracer::tracePathConstraint(SymExpr constraint, bool taken) { if (pathConstraints.empty()) { - symcc_set_test_case_handler( + symcc_set_test_case_handler( reinterpret_cast(traceNewInput)); } nlohmann::json newEntry; - newEntry["symbol"] = getSymbolID(constraint); + newEntry["expression"] = getExpressionID(constraint); newEntry["after_step"] = currentTrace.size() - 1; newEntry["new_input_value"] = nlohmann::json(); newEntry["taken"] = taken; @@ -65,46 +68,46 @@ void Tracer::traceNewInput(const unsigned char *input, size_t size) { } void Tracer::writeTraceToDisk() { - for (auto const &[_, symbolPtr] : getAllocatedExpressions()) { - recursivelyCollectSymbols(symbolPtr); + for (auto const &[_, expressionPtr] : getAllocatedExpressions()) { + recursivelyCollectExpressions(expressionPtr); } nlohmann::json dataToSave; dataToSave["trace"] = currentTrace; - dataToSave["symbols"] = symbols; + dataToSave["expressions"] = expressions; dataToSave["path_constraints"] = pathConstraints; std::ofstream o(BACKEND_TRACE_FILE); o << std::setw(4) << dataToSave << std::endl; } -void Tracer::recursivelyCollectSymbols(const shared_ptr& symbolPtr) { - string symbolID = getSymbolID(symbolPtr.get()); - if (symbols.count(symbolID) > 0) { +void Tracer::recursivelyCollectExpressions(const shared_ptr&expressionPtr) { + string expressionID = getExpressionID(expressionPtr.get()); + if (expressions.count(expressionID) > 0) { return; } - symbols[symbolID]["operation"]["kind"] = symbolPtr->kind(); - symbols[symbolID]["operation"]["properties"] = nlohmann::json::object(); - if (symbolPtr->kind() == qsym::Constant){ - auto value_llvm_int = static_pointer_cast(symbolPtr)->value(); + expressions[expressionID]["operation"]["kind"] = expressionPtr->kind(); + expressions[expressionID]["operation"]["properties"] = nlohmann::json::object(); + if (expressionPtr->kind() == qsym::Constant){ + auto value_llvm_int = static_pointer_cast(expressionPtr)->value(); std::string value_str; llvm::raw_string_ostream rso(value_str); value_llvm_int.print(rso, false); - symbols[symbolID]["operation"]["properties"]["value"] = value_str; + expressions[expressionID]["operation"]["properties"]["value"] = value_str; } - symbols[symbolID]["size_bits"] = symbolPtr->bits(); - symbols[symbolID]["input_byte_dependency"] = *symbolPtr->getDependencies(); - symbols[symbolID]["args"] = nlohmann::json::array(); - for (int child_i = 0; child_i < symbolPtr->num_children(); child_i++) { - shared_ptr child = symbolPtr->getChild(child_i); - string childID = getSymbolID(child.get()); - symbols[symbolID]["args"].push_back(childID); - recursivelyCollectSymbols(child); + expressions[expressionID]["size_bits"] = expressionPtr->bits(); + expressions[expressionID]["input_byte_dependency"] = *expressionPtr->getDependencies(); + expressions[expressionID]["args"] = nlohmann::json::array(); + for (int child_i = 0; child_i < expressionPtr->num_children(); child_i++) { + shared_ptr child = expressionPtr->getChild(child_i); + string childID = getExpressionID(child.get()); + expressions[expressionID]["args"].push_back(childID); + recursivelyCollectExpressions(child); } } -string Tracer::getSymbolID(SymExpr symbol) { - return std::to_string(reinterpret_cast(symbol)); -} \ No newline at end of file +string Tracer::getExpressionID(SymExpr expression) { + return std::to_string(reinterpret_cast(expression)); +} diff --git a/runtime/Tracer.h b/runtime/Tracer.h index 092a6057..fd1cb448 100644 --- a/runtime/Tracer.h +++ b/runtime/Tracer.h @@ -8,19 +8,49 @@ #include #include +/** + * Generates a trace of the symbolic execution. + * + * A trace contains a list of symbolic memory snapshots, a list of path constraints and a list of expressions. + * The trace is written to a json file at the end of the execution. + */ class Tracer { public: + /** + * Adds a dump of the current symbolic state to the trace. + */ static void traceStep(uintptr_t pc); + + /** + * Adds a path constraint to the trace. + */ static void tracePathConstraint(SymExpr constraint, bool taken); + + /** + * Adds an input generated by the symbolic backend. + * The input is associated to the last path constraint. + */ static void traceNewInput(const unsigned char* input, size_t size); + + /** + * Generates a json file containing the trace. + * Must be called before the program exits. + */ static void writeTraceToDisk(); private: - static void recursivelyCollectSymbols(const shared_ptr& symbolPtr); - static string getSymbolID(SymExpr symbol); + /** + * Collects all expressions reachable from the given expression. + */ + static void recursivelyCollectExpressions(const shared_ptr&expressionPtr); + + /** + * Generates a string ID from the address of the given expression. + */ + static string getExpressionID(SymExpr expression); static nlohmann::json currentTrace; - static nlohmann::json symbols; + static nlohmann::json expressions; static nlohmann::json pathConstraints; static const std::string BACKEND_TRACE_FILE; }; diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index ea4fe3f6..c62dc68c 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -142,8 +142,9 @@ class EnhancedQsymSolver : public qsym::Solver { } void saveValues(const std::string &suffix) override { + auto values = getConcreteValues(); if (auto handler = g_test_case_handler) { - auto values = getConcreteValues(); + // The test-case handler may be instrumented, so let's call it with // argument expressions to meet instrumented code's expectations. // Otherwise, we might end up erroneously using whatever expression was @@ -151,9 +152,20 @@ class EnhancedQsymSolver : public qsym::Solver { _sym_set_parameter_expression(0, nullptr); _sym_set_parameter_expression(1, nullptr); handler(values.data(), values.size()); - } else { - Solver::saveValues(suffix); } +// } else { + Solver::saveValues(suffix); + puts("New testcase hex : "); + for (unsigned char value : values) { + printf("%02x", value); + } + puts(""); + puts("New testcase ascii : "); + for (unsigned char value : values) { + printf("%c", value); + } + puts(""); +// } } }; diff --git a/util/symbolic_trace/README.md b/util/symbolic_trace/README.md new file mode 100644 index 00000000..6f03e33f --- /dev/null +++ b/util/symbolic_trace/README.md @@ -0,0 +1,7 @@ +This is a python package for parsing a trace generated by the tracing feature of SymCC. + +Install it with + +``` +pip install . +``` \ No newline at end of file diff --git a/util/symbolic_trace/symcctrace/__init__.py b/util/symbolic_trace/symcctrace/__init__.py index 746518b7..6b9d7eef 100644 --- a/util/symbolic_trace/symcctrace/__init__.py +++ b/util/symbolic_trace/symcctrace/__init__.py @@ -1,2 +1,2 @@ from .parsing import parse_trace -from .data import TraceData \ No newline at end of file +from .data import TraceData diff --git a/util/symbolic_trace/symcctrace/data.py b/util/symbolic_trace/symcctrace/data.py index 3d52f487..a965c951 100644 --- a/util/symbolic_trace/symcctrace/data.py +++ b/util/symbolic_trace/symcctrace/data.py @@ -2,7 +2,7 @@ import dataclasses -class SymbolKind(enum.Enum): +class ExpressionKind(enum.Enum): BOOL = 0 CONSTANT = 1 READ = 2 @@ -46,12 +46,12 @@ class SymbolKind(enum.Enum): @dataclasses.dataclass class Operation: - kind: SymbolKind + kind: ExpressionKind properties: dict @dataclasses.dataclass -class RawSymbol: +class RawExpression: operation: Operation size_bits: int input_byte_dependency: list[int] @@ -61,12 +61,12 @@ class RawSymbol: @dataclasses.dataclass class RawTraceStep: pc: int - memory_to_symbol_mapping: dict[str, str] + memory_to_expression_mapping: dict[str, str] @dataclasses.dataclass class RawPathConstraint: - symbol: str + expression: str after_step: int new_input_value: list[int] | None taken: bool @@ -75,27 +75,27 @@ class RawPathConstraint: @dataclasses.dataclass class RawTraceData: trace: list[RawTraceStep] - symbols: dict[str, RawSymbol] + expressions: dict[str, RawExpression] path_constraints: list[RawPathConstraint] @dataclasses.dataclass -class Symbol: +class Expression: operation: Operation size_bits: int input_byte_dependency: list[int] - args: list['Symbol'] + args: list['Expression'] @dataclasses.dataclass class TraceStep: pc: int - memory_to_symbol_mapping: dict[str, Symbol] + memory_to_expression_mapping: dict[str, Expression] @dataclasses.dataclass class PathConstraint: - symbol: Symbol + expression: Expression after_step: TraceStep new_input_value: bytes | None taken: bool @@ -110,6 +110,6 @@ class MemoryArea: @dataclasses.dataclass class TraceData: trace: list[TraceStep] - symbols: dict[str, Symbol] + expressions: dict[str, Expression] path_constraints: list[PathConstraint] memory_areas: list[MemoryArea] diff --git a/util/symbolic_trace/symcctrace/parsing.py b/util/symbolic_trace/symcctrace/parsing.py index 2acd85ca..0a571538 100644 --- a/util/symbolic_trace/symcctrace/parsing.py +++ b/util/symbolic_trace/symcctrace/parsing.py @@ -15,6 +15,12 @@ def parse_trace( trace_file: pathlib.Path, memory_region_names_file: pathlib.Path, ) -> data.TraceData: + """Parses a trace generated by the tracing feature of SymCC. + + Args + trace_file: The path to the trace file generated by SymCC. + memory_region_names_file: The path to a file containing a mapping from memory region names to their addresses. + """ with open(trace_file) as file: raw_trace_data = cattrs.Converter(forbid_extra_keys=True).structure(json.load(file), data.RawTraceData) @@ -24,15 +30,17 @@ def convert_to_memory_area(raw_memory_area: dict[str, str]) -> data.MemoryArea: memory_areas = list(map(convert_to_memory_area, json.load(file))) - symbols = _convert_symbols(raw_trace_data.symbols) + expressions = _convert_expressions(raw_trace_data.expressions) trace_steps = list( - map(functools.partial(_convert_trace_step, symbols=symbols, memory_areas=memory_areas), raw_trace_data.trace)) - path_constraints = list(map(functools.partial(_convert_path_constraint, symbols=symbols, trace_steps=trace_steps), - raw_trace_data.path_constraints)) + map(functools.partial(_convert_trace_step, expressions=expressions, memory_areas=memory_areas), + raw_trace_data.trace)) + path_constraints = list( + map(functools.partial(_convert_path_constraint, expressions=expressions, trace_steps=trace_steps), + raw_trace_data.path_constraints)) trace_data = data.TraceData( trace=trace_steps, - symbols=symbols, + expressions=expressions, path_constraints=path_constraints, memory_areas=memory_areas ) @@ -42,7 +50,7 @@ def convert_to_memory_area(raw_memory_area: dict[str, str]) -> data.MemoryArea: def _convert_operation(raw_operation: data.Operation, size_bits: int) -> data.Operation: operation = data.Operation( - kind=data.SymbolKind(raw_operation.kind), + kind=data.ExpressionKind(raw_operation.kind), properties=raw_operation.properties ) @@ -52,29 +60,29 @@ def _convert_operation(raw_operation: data.Operation, size_bits: int) -> data.Op return operation -def _convert_symbols(raw_symbols: dict[str, data.RawSymbol]) -> dict[str, data.Symbol]: - symbols = {} +def _convert_expressions(raw_expressions: dict[str, data.RawExpression]) -> dict[str, data.Expression]: + expressions = {} - def recursively_create_symbol(symbol_id: str): - if symbol_id in symbols: - return symbols[symbol_id] + def recursively_create_expression(expression_id: str): + if expression_id in expressions: + return expressions[expression_id] - raw_symbol = raw_symbols[symbol_id] - args = [recursively_create_symbol(arg) for arg in raw_symbol.args] + raw_expression = raw_expressions[expression_id] + args = [recursively_create_expression(arg) for arg in raw_expression.args] - symbol = data.Symbol( - operation=_convert_operation(raw_symbol.operation, raw_symbol.size_bits), + expression = data.Expression( + operation=_convert_operation(raw_expression.operation, raw_expression.size_bits), args=args, - input_byte_dependency=raw_symbol.input_byte_dependency, - size_bits=raw_symbol.size_bits + input_byte_dependency=raw_expression.input_byte_dependency, + size_bits=raw_expression.size_bits ) - symbols[symbol_id] = symbol - return symbol + expressions[expression_id] = expression + return expression - for symbol_id in raw_symbols: - recursively_create_symbol(symbol_id) + for expression_id in raw_expressions: + recursively_create_expression(expression_id) - return symbols + return expressions def _raw_memory_address_to_named_location(raw_memory_address: int, memory_areas: list[data.MemoryArea]) -> str: @@ -95,24 +103,24 @@ def absolute_distance(memory_area: data.MemoryArea) -> int: return f'{closest_memory_area.name}+{hex(distance(closest_memory_area))}' -def _convert_trace_step(raw_trace_step: data.RawTraceStep, symbols: dict[str, data.Symbol], +def _convert_trace_step(raw_trace_step: data.RawTraceStep, expressions: dict[str, data.Expression], memory_areas: list[data.MemoryArea]) -> data.TraceStep: - def convert_mapping(mapping: dict[str, str]) -> dict[str, data.Symbol]: - def convert_mapping_element(raw_address: str, symbol_id: str) -> tuple[str, data.Symbol]: - return _raw_memory_address_to_named_location(int(raw_address), memory_areas), symbols[symbol_id] + def convert_mapping(mapping: dict[str, str]) -> dict[str, data.Expression]: + def convert_mapping_element(raw_address: str, expression_id: str) -> tuple[str, data.Expression]: + return _raw_memory_address_to_named_location(int(raw_address), memory_areas), expressions[expression_id] return dict(itertools.starmap(convert_mapping_element, mapping.items())) return data.TraceStep( pc=raw_trace_step.pc, - memory_to_symbol_mapping=convert_mapping(raw_trace_step.memory_to_symbol_mapping) + memory_to_expression_mapping=convert_mapping(raw_trace_step.memory_to_expression_mapping) ) -def _convert_path_constraint(raw_path_constraint: data.RawPathConstraint, symbols: dict[str, data.Symbol], +def _convert_path_constraint(raw_path_constraint: data.RawPathConstraint, expressions: dict[str, data.Expression], trace_steps: list[data.TraceStep]) -> data.PathConstraint: return data.PathConstraint( - symbol=symbols[raw_path_constraint.symbol], + expression=expressions[raw_path_constraint.expression], after_step=trace_steps[raw_path_constraint.after_step], new_input_value=bytes( raw_path_constraint.new_input_value) if raw_path_constraint.new_input_value is not None else None, From 85fff40ea5bd1a8c534b0ba0bfe1cd2b838c38dc Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Sat, 13 Jan 2024 12:31:25 +0000 Subject: [PATCH 13/17] increase distance threashold for memory area mappings --- util/symbolic_trace/symcctrace/parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/symbolic_trace/symcctrace/parsing.py b/util/symbolic_trace/symcctrace/parsing.py index 0a571538..c86396f1 100644 --- a/util/symbolic_trace/symcctrace/parsing.py +++ b/util/symbolic_trace/symcctrace/parsing.py @@ -8,7 +8,7 @@ from . import data -MEMORY_AREA_MAX_DISTANCE = 0x100000 +MEMORY_AREA_MAX_DISTANCE = 0x1000000 def parse_trace( From e84cf1af04397dee6176b3e5fe03570b8d70ee2a Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Sun, 25 Feb 2024 17:23:09 +0000 Subject: [PATCH 14/17] Put allocatedExpressions back in the anonymous namespace In a previous commit I got allocatedExpressions out of the anonymous namespace, but this is not necessary anymore as it can be accessed with getAllocatedExpressions --- runtime/qsym_backend/Runtime.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index c62dc68c..66c35d80 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -76,6 +76,11 @@ z3::context *g_z3_context; } // namespace qsym +namespace { + +/// Indicate whether the runtime has been initialized. +std::atomic_flag g_initialized = ATOMIC_FLAG_INIT; + /// A mapping of all expressions that we have ever received from QSYM to the /// corresponding shared pointers on the heap. /// @@ -87,17 +92,6 @@ z3::context *g_z3_context; /// workload. std::map allocatedExpressions; -std::map &getAllocatedExpressions() { - return allocatedExpressions; -} - -namespace { - -/// Indicate whether the runtime has been initialized. -std::atomic_flag g_initialized = ATOMIC_FLAG_INIT; - - - SymExpr registerExpression(const qsym::ExprRef &expr) { SymExpr rawExpr = expr.get(); @@ -173,6 +167,10 @@ EnhancedQsymSolver *g_enhanced_solver; } // namespace +std::map &getAllocatedExpressions() { + return allocatedExpressions; +} + using namespace qsym; #if HAVE_FILESYSTEM From d3b4ca599cdf69648b821391239da94f7d9781dd Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Sun, 25 Feb 2024 17:31:10 +0000 Subject: [PATCH 15/17] Remove test cases prints Those are lines of code that I did not intend to include in the PR --- runtime/qsym_backend/Runtime.cpp | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index 66c35d80..3f50b28c 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -136,9 +136,8 @@ class EnhancedQsymSolver : public qsym::Solver { } void saveValues(const std::string &suffix) override { - auto values = getConcreteValues(); if (auto handler = g_test_case_handler) { - + auto values = getConcreteValues(); // The test-case handler may be instrumented, so let's call it with // argument expressions to meet instrumented code's expectations. // Otherwise, we might end up erroneously using whatever expression was @@ -146,20 +145,9 @@ class EnhancedQsymSolver : public qsym::Solver { _sym_set_parameter_expression(0, nullptr); _sym_set_parameter_expression(1, nullptr); handler(values.data(), values.size()); - } -// } else { + } else { Solver::saveValues(suffix); - puts("New testcase hex : "); - for (unsigned char value : values) { - printf("%02x", value); - } - puts(""); - puts("New testcase ascii : "); - for (unsigned char value : values) { - printf("%c", value); - } - puts(""); -// } + } } }; From ae6161bbd88204eefe1a1c8e23cde6840938977b Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Sun, 25 Feb 2024 17:36:50 +0000 Subject: [PATCH 16/17] Revert unnecesary comment --- runtime/qsym_backend/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/qsym_backend/CMakeLists.txt b/runtime/qsym_backend/CMakeLists.txt index cd5f40db..d5d76744 100644 --- a/runtime/qsym_backend/CMakeLists.txt +++ b/runtime/qsym_backend/CMakeLists.txt @@ -98,5 +98,5 @@ target_link_libraries(SymRuntime ${Z3_LIBRARIES} ${QSYM_LLVM_DEPS} nlohmann_json # some current LTS distributions ship a GCC that requires libstdc++fs for # std::filesystem - we catch this case in order to enable users of such systems # to build with the default compiler. -#find_package(Filesystem COMPONENTS Final Experimental) -#target_link_libraries(SymRuntime std::filesystem) +find_package(Filesystem COMPONENTS Final Experimental) +target_link_libraries(SymRuntime std::filesystem) From e3f37935145c237e23a277243010eaaa915248b4 Mon Sep 17 00:00:00 2001 From: Damien Maier <91182145+damienmaier@users.noreply.github.com> Date: Sun, 25 Feb 2024 17:41:43 +0000 Subject: [PATCH 17/17] fix indentation --- runtime/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index b354c3bc..610e1f8b 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -34,7 +34,7 @@ set(SHARED_RUNTIME_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/RuntimeCommon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/LibcWrappers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Shadow.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/Tracer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/Tracer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/GarbageCollection.cpp) if (${QSYM_BACKEND})