From 560a5f1ff96e4a2efa47fdd9f95c6a60c21f23b5 Mon Sep 17 00:00:00 2001 From: hbrodin <90325907+hbrodin@users.noreply.github.com> Date: Mon, 12 Sep 2022 11:12:28 +0200 Subject: [PATCH 1/9] Bump C++ version to 20 globally --- CMakeLists.txt | 2 ++ polytracker/src/CMakeLists.txt | 2 -- polytracker/src/taintdag/CMakeLists.txt | 1 - polytracker/src/taintdag/test/CMakeLists.txt | 2 -- 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c535a9ab..5a8ca4ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,8 @@ if(NOT EXISTS "${PROJECT_SOURCE_DIR}/third_party/indicators/CMakeLists.txt") message(FATAL_ERROR "git submodule update --init --recursive must be run first to checkout submodules") endif() +set(CMAKE_CXX_STANDARD 20) + add_subdirectory(third_party/Catch2) list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/third_party/Catch2/contrib") add_subdirectory(third_party/indicators) diff --git a/polytracker/src/CMakeLists.txt b/polytracker/src/CMakeLists.txt index 316a052d..6aa4451d 100644 --- a/polytracker/src/CMakeLists.txt +++ b/polytracker/src/CMakeLists.txt @@ -1,5 +1,3 @@ -set(CMAKE_CXX_STANDARD 17) - find_package(LLVM 13 CONFIG) if(LLVM_FOUND) diff --git a/polytracker/src/taintdag/CMakeLists.txt b/polytracker/src/taintdag/CMakeLists.txt index 2bf242e5..a7c2c6ac 100644 --- a/polytracker/src/taintdag/CMakeLists.txt +++ b/polytracker/src/taintdag/CMakeLists.txt @@ -1,5 +1,4 @@ -set(CMAKE_CXX_STANDARD 17) add_subdirectory(test) add_library(taintdag STATIC encoding.cpp fdmapping.cpp output.cpp print.cpp) \ No newline at end of file diff --git a/polytracker/src/taintdag/test/CMakeLists.txt b/polytracker/src/taintdag/test/CMakeLists.txt index 9d1ab88a..6c748f93 100644 --- a/polytracker/src/taintdag/test/CMakeLists.txt +++ b/polytracker/src/taintdag/test/CMakeLists.txt @@ -1,5 +1,3 @@ -set(CMAKE_CXX_STANDARD 17) - include(CTest) include(Catch) From aebd193f83ab629843cfcbc17d68ddca028a5b1a Mon Sep 17 00:00:00 2001 From: hbrodin <90325907+hbrodin@users.noreply.github.com> Date: Tue, 13 Sep 2022 13:38:17 +0200 Subject: [PATCH 2/9] Install blight before polytracker to reuse cached version of install --- Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 13315a4a..266d272a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,6 +20,10 @@ RUN update-ca-certificates RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 10 RUN python3 -m pip install pip && python3 -m pip install pytest +WORKDIR /blight +RUN git clone https://github.com/trailofbits/blight.git . +RUN pip3 install . + COPY . /polytracker RUN mkdir /polytracker/build @@ -27,9 +31,6 @@ WORKDIR /polytracker/build RUN cmake -GNinja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_VERBOSE_MAKEFILE=TRUE -DCXX_LIB_PATH=/cxx_libs .. RUN ninja install -WORKDIR /blight -RUN git clone https://github.com/trailofbits/blight.git . -RUN pip3 install . WORKDIR /polytracker RUN pip3 install . From 38e2e6bd5e090b15e73941713e1143c6df0748c2 Mon Sep 17 00:00:00 2001 From: hbrodin <90325907+hbrodin@users.noreply.github.com> Date: Tue, 13 Sep 2022 13:57:48 +0200 Subject: [PATCH 3/9] Initial version of argv-tainting --- polytracker/custom_abi/dfsan_abilist.txt | 2 + .../custom_abi/polytracker_abilist.txt | 2 + polytracker/include/polytracker/polytracker.h | 3 ++ .../include/polytracker/taint_sources.h | 4 ++ polytracker/include/taintdag/polytracker.h | 8 ++++ polytracker/src/CMakeLists.txt | 3 +- polytracker/src/passes/polytracker_pass.cpp | 43 +++++++++++++++++++ polytracker/src/polytracker/main.cpp | 8 ++++ polytracker/src/polytracker/polytracker.cpp | 5 +++ polytracker/src/taint_sources/argv.cpp | 33 ++++++++++++++ polytracker/src/taintdag/polytracker.cpp | 22 ++++++++++ tests/test_argv.cpp | 10 +++++ tests/test_polytracker.py | 21 +++++++++ 13 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 polytracker/src/taint_sources/argv.cpp create mode 100644 tests/test_argv.cpp diff --git a/polytracker/custom_abi/dfsan_abilist.txt b/polytracker/custom_abi/dfsan_abilist.txt index 8da32a00..85a9b887 100644 --- a/polytracker/custom_abi/dfsan_abilist.txt +++ b/polytracker/custom_abi/dfsan_abilist.txt @@ -62,6 +62,8 @@ fun:__polytracker_store_blob=uninstrumented fun:__polytracker_store_blob=discard fun:__polytracker_preserve_map=uninstrumented fun:__polytracker_preserve_map=discard +fun:__polytracker_taint_argv=uninstrumented +fun:__polytracker_taint_argv=discard fun:__dfsan_update_label_count=uninstrumented fun:__dfsan_update_label_count=discard diff --git a/polytracker/custom_abi/polytracker_abilist.txt b/polytracker/custom_abi/polytracker_abilist.txt index e9690f6a..46693305 100644 --- a/polytracker/custom_abi/polytracker_abilist.txt +++ b/polytracker/custom_abi/polytracker_abilist.txt @@ -62,6 +62,8 @@ fun:__polytracker_store_blob=uninstrumented fun:__polytracker_store_blob=discard fun:__polytracker_preserve_map=uninstrumented fun:__polytracker_preserve_map=discard +fun:__polytracker_taint_argv=uninstrumented +fun:__polytracker_taint_argv=discard fun:__remill_jump=uninstrumented fun:__remill_jump=discard diff --git a/polytracker/include/polytracker/polytracker.h b/polytracker/include/polytracker/polytracker.h index d2871a9a..409e3bfc 100644 --- a/polytracker/include/polytracker/polytracker.h +++ b/polytracker/include/polytracker/polytracker.h @@ -46,3 +46,6 @@ extern uint64_t func_mapping_count; extern const block_mapping *block_mappings; extern uint64_t block_mapping_count; + +// Controls argv being a taint source +extern bool polytracker_taint_argv; \ No newline at end of file diff --git a/polytracker/include/polytracker/taint_sources.h b/polytracker/include/polytracker/taint_sources.h index a5917d5b..dd09f3c8 100644 --- a/polytracker/include/polytracker/taint_sources.h +++ b/polytracker/include/polytracker/taint_sources.h @@ -8,4 +8,8 @@ #define EXT_C_FUNC extern "C" __attribute__((visibility("default"))) #define EXT_CXX_FUNC extern __attribute__((visibility("default"))) +namespace polytracker { +void taint_argv(int argc, char *argv[]); +} + #endif \ No newline at end of file diff --git a/polytracker/include/taintdag/polytracker.h b/polytracker/include/taintdag/polytracker.h index dd95c5fc..e6480684 100644 --- a/polytracker/include/taintdag/polytracker.h +++ b/polytracker/include/taintdag/polytracker.h @@ -2,6 +2,7 @@ #define POLYTRACKER_TAINTDAG_POLYTRACKER_H #include +#include #include "taintdag/fdmapping.hpp" #include "taintdag/output.hpp" @@ -32,6 +33,13 @@ class PolyTracker { std::optional source_taint(int fd, source_offset_t offset, size_t length); + // Create a new taint source (not a file) and assigns taint labels + // A new taint source named 'name' is created + // Memory in 'dst' is assigned source taint labels referring to source 'name' + // and in increasing offset. + std::optional create_taint_source(std::string_view name, + std::span dst); + // Update the label, it affects control flow void affects_control_flow(label_t taint_label); diff --git a/polytracker/src/CMakeLists.txt b/polytracker/src/CMakeLists.txt index 6aa4451d..ef77a9c7 100644 --- a/polytracker/src/CMakeLists.txt +++ b/polytracker/src/CMakeLists.txt @@ -48,7 +48,8 @@ set(CMAKE_EXE_LINKER_FLAGS set(POLY_SOURCES ${POLY_DIR}/main.cpp ${POLY_DIR}/polytracker.cpp) set(TAINT_SOURCES ${TAINT_DIR}/taint_sources.cpp ${TAINT_DIR}/string_taints.cpp - ${TAINT_DIR}/memory_taints.cpp ${TAINT_DIR}/write_taints.cpp) + ${TAINT_DIR}/memory_taints.cpp ${TAINT_DIR}/write_taints.cpp + ${TAINT_DIR}/argv.cpp) set(TAINTDAG_SOURCES ${TAINTDAG_DIR}/encoding.cpp ${TAINTDAG_DIR}/fdmapping.cpp ${TAINTDAG_DIR}/output.cpp ${TAINTDAG_DIR}/polytracker.cpp) diff --git a/polytracker/src/passes/polytracker_pass.cpp b/polytracker/src/passes/polytracker_pass.cpp index 74bbfa86..519e4f2a 100644 --- a/polytracker/src/passes/polytracker_pass.cpp +++ b/polytracker/src/passes/polytracker_pass.cpp @@ -5,6 +5,7 @@ // #include "polytracker/thread_pool.h" #include "spdlog/cfg/env.h" #include "spdlog/spdlog.h" +#include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -289,12 +290,49 @@ bool PolytrackerPass::analyzeBlock(llvm::Function *func, return true; } +// Inserts a function call to polytracker::taint_argv(argc, argv) +// Assumes main is actually the main function of the program and +// interprets first arg as argc and second as argv. +static void emitTaintArgvCall(llvm::Function &main) { + // Get the parameters of the main function, argc, argv + auto argc = main.getArg(0); + if (!argc) { + spdlog::error("Failed to instrument argv. No argc available."); + return; + } + auto argc_ty = argc->getType(); + + auto argv = main.getArg(1); + if (!argv) { + spdlog::error("Failed to instrument argv. No argv available."); + return; + } + auto argv_ty = argv->getType(); + + // Define the target function type and make it available in the module + auto taint_argv_ty = llvm::FunctionType::get( + llvm::Type::getVoidTy(main.getContext()), {argc_ty, argv_ty}, false); + llvm::FunctionCallee taint_argv = main.getParent()->getOrInsertFunction( + "__polytracker_taint_argv", taint_argv_ty); + assert(taint_argv); + + // Emit a call to the taint_argv function using parameters from main. + auto &bb = main.getEntryBlock(); + llvm::Instruction &insert_point = *(bb.getFirstInsertionPt()); + llvm::IRBuilder<> irb(&insert_point); + auto ci = irb.CreateCall(taint_argv, {argc, argv}); + if (!ci) { + spdlog::error("Failed to insert call to taint_argv."); + } +} + /* We should instrument everything we have bitcode for, right? If instructions have __polytracker, or they have __dfsan, ignore! */ bool PolytrackerPass::analyzeFunction(llvm::Function *f, const func_index_t &func_index) { + // Add Function entry polytracker::BBSplittingPass bbSplitter; // llvm::removeUnreachableBlocks(*f); @@ -340,6 +378,11 @@ bool PolytrackerPass::analyzeFunction(llvm::Function *f, visit(inst); } + // If this is the main function, insert a taint-argv call + if (f && f->getName() == "main") { + emitTaintArgvCall(*f); + } + return true; } diff --git a/polytracker/src/polytracker/main.cpp b/polytracker/src/polytracker/main.cpp index 1b715dd9..6cdb7d2b 100644 --- a/polytracker/src/polytracker/main.cpp +++ b/polytracker/src/polytracker/main.cpp @@ -28,10 +28,14 @@ DECLARE_EARLY_CONSTRUCT(std::string, polytracker_db_name); DECLARE_EARLY_CONSTRUCT(std::string, polytracker_stderr_sink); DECLARE_EARLY_CONSTRUCT(std::string, polytracker_stdout_sink); +// Controls argv being a taint source +bool polytracker_taint_argv = false; + uint64_t byte_start = 0; uint64_t byte_end = 0; bool polytracker_trace = false; bool polytracker_trace_func = false; + /** * Whether or not to save the input files to the output database */ @@ -65,6 +69,10 @@ void polytracker_parse_env() { if (auto err = getenv("POLYTRACKER_STDERR_SINK")) { get_polytracker_stderr_sink() = err; } + + if (auto argv = getenv("POLYTRACKER_TAINT_ARGV")) { + polytracker_taint_argv = argv[0] == '1'; + } } /* diff --git a/polytracker/src/polytracker/polytracker.cpp b/polytracker/src/polytracker/polytracker.cpp index 8c973dc2..0c262feb 100644 --- a/polytracker/src/polytracker/polytracker.cpp +++ b/polytracker/src/polytracker/polytracker.cpp @@ -1,5 +1,6 @@ #include "polytracker/polytracker.h" #include "polytracker/early_construct.h" +#include "polytracker/taint_sources.h" #include "taintdag/polytracker.h" #include #include @@ -87,6 +88,10 @@ extern "C" void __polytracker_start(func_mapping const *globals, no_control_flow_tracing); } +extern "C" void __polytracker_taint_argv(int argc, char *argv[]) { + polytracker::taint_argv(argc, argv); +} + extern "C" void __polytracker_store_function_mapping(const func_mapping *func_map, uint64_t *count) { diff --git a/polytracker/src/taint_sources/argv.cpp b/polytracker/src/taint_sources/argv.cpp new file mode 100644 index 00000000..f7020663 --- /dev/null +++ b/polytracker/src/taint_sources/argv.cpp @@ -0,0 +1,33 @@ +#include + +#include "polytracker/early_construct.h" +#include "polytracker/polytracker.h" +#include "taintdag/polytracker.h" + +using namespace std::literals; + +EARLY_CONSTRUCT_EXTERN_GETTER(taintdag::PolyTracker, polytracker_tdag); + +namespace polytracker { + +void taint_argv(int argc, char *argv[]) { + // The check could be done in the calling code, for performance reasons. + // However this function should only ever be invoked once (from main). + if (!polytracker_taint_argv) + return; + + if (argc <= 0) { + // Weird. Not much to do though. + return; + } + + auto &polyt = get_polytracker_tdag(); + + for (int i = 0; i < argc; i++) { + auto name = "argv["s + std::to_string(i) + "]"; + // NOTE(hbrodin): Currently not tainting terminating null char. + polyt.create_taint_source( + name, {reinterpret_cast(argv[i]), strlen(argv[i])}); + } +} +} // namespace polytracker \ No newline at end of file diff --git a/polytracker/src/taintdag/polytracker.cpp b/polytracker/src/taintdag/polytracker.cpp index d8fe9686..5bcca294 100644 --- a/polytracker/src/taintdag/polytracker.cpp +++ b/polytracker/src/taintdag/polytracker.cpp @@ -120,6 +120,28 @@ PolyTracker::source_taint(int fd, source_offset_t offset, size_t length) { return create_source_taint(fd, offset, length); } +std::optional +PolyTracker::create_taint_source(std::string_view name, + std::span dst) { + // Reserve a contiguous range of labels for this source + auto rng = tdag_.reserve_source_labels(dst.size()); + + // Register the source by name (and its preallocated range). + auto idx = fdm_.add_mapping(-1, name, rng); + if (!idx) + return {}; + + // Construct the allocated labels as source labels belonging to source 'idx' + tdag_.assign_source_labels(rng, *idx, 0); + + // Mark memory with corresponding labels + auto lbl = rng.first; + for (auto &c : dst) { + dfsan_set_label(lbl++, &c, sizeof(char)); + } + return rng; +} + void PolyTracker::taint_sink(int fd, sink_offset_t offset, void const *mem, size_t length) { auto idx = fdm_.mapping_idx(fd); diff --git a/tests/test_argv.cpp b/tests/test_argv.cpp new file mode 100644 index 00000000..a0987492 --- /dev/null +++ b/tests/test_argv.cpp @@ -0,0 +1,10 @@ +#include +#include + +int main(int argc, char *argv[]) { + auto f = fopen("outputfile.txt", "w"); + for (int i=0;i Date: Tue, 13 Sep 2022 14:05:23 +0200 Subject: [PATCH 4/9] Formatting --- tests/test_polytracker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_polytracker.py b/tests/test_polytracker.py index b7eb2962..cc0c9f74 100644 --- a/tests/test_polytracker.py +++ b/tests/test_polytracker.py @@ -241,7 +241,7 @@ def test_taint_forest(program_trace: ProgramTrace): desc="validating", unit=" taint nodes", ): - # TODO (hbrodin): proper evaluation + # TODO (hbrodin): proper evaluation if taint_node.is_canonical(): assert taint_node.parent_one is None assert taint_node.parent_two is None @@ -250,4 +250,4 @@ def test_taint_forest(program_trace: ProgramTrace): assert taint_node.parent_two is not None had_taint_union = True # There was at least one taint union - assert had_taint_union \ No newline at end of file + assert had_taint_union From f63c1ebc722de540c5a3c3600ac564d7fe43f22f Mon Sep 17 00:00:00 2001 From: hbrodin <90325907+hbrodin@users.noreply.github.com> Date: Tue, 13 Sep 2022 15:02:23 +0200 Subject: [PATCH 5/9] Added test case for taint-argv --- tests/test_argv.py | 54 +++++++++++++++++++++++++++++++++++++++ tests/test_polytracker.py | 21 --------------- tests/tracing.py | 1 + 3 files changed, 55 insertions(+), 21 deletions(-) create mode 100644 tests/test_argv.py diff --git a/tests/test_argv.py b/tests/test_argv.py new file mode 100644 index 00000000..df32d783 --- /dev/null +++ b/tests/test_argv.py @@ -0,0 +1,54 @@ +import pytest + +from pathlib import Path +from polytracker import taint_dag, ProgramTrace + + +@pytest.fixture +def set_env_vars(monkeypatch): + monkeypatch.setenv("POLYTRACKER_TAINT_ARGV", "1") + + +@pytest.mark.program_trace("test_argv.cpp", input="any") +def test_argv(set_env_vars, program_trace: ProgramTrace): + assert isinstance(program_trace, taint_dag.TDProgramTrace) + argv0 = Path("argv[0]") + argv1 = Path("argv[1]") + headers = list(program_trace.tdfile.fd_headers) + paths = list(map(lambda h: h[0], headers)) + assert len(paths) == 3 + assert argv0 in paths + assert argv1 in paths + + sinks = list(program_trace.tdfile.sinks) + + with open("outputfile.txt", "r") as f: + output = f.read() + + assert len(output) == len(sinks) + + last_fdidx = 0 + last_offset = 0 + for s in sinks: + sink_fd_idx = s.fdidx + label = s.label + + n = program_trace.tdfile.decode_node(label) + # No transformation/union of argv is made + assert isinstance(n, taint_dag.TDSourceNode) + + # If we just stepped to the next taint source, reset the offset + if last_fdidx != n.idx: + last_offset = 0 + + # First write argv[0], then argv[1], ... + assert last_fdidx <= n.idx + + # Write argv[x], all offsets + assert last_offset <= n.offset + + # Source file indices for argv[x] are opened before output file + assert n.idx < sink_fd_idx + + last_offset = n.offset + last_fdidx = n.idx diff --git a/tests/test_polytracker.py b/tests/test_polytracker.py index cc0c9f74..f9212f02 100644 --- a/tests/test_polytracker.py +++ b/tests/test_polytracker.py @@ -230,24 +230,3 @@ def test_retcode(program_trace: Union[ProgramTrace, Exception]): def test_stack(program_trace: Union[ProgramTrace, Exception]): # test_retcode.c should cause a CalledProcessError to be returned since it has a non-zero exit code assert not isinstance(program_trace, CalledProcessError) - - -@pytest.mark.program_trace("test_argv.cpp", input="ignored") -def test_taint_forest(program_trace: ProgramTrace): - had_taint_union = False - for taint_node in tqdm( - program_trace.taint_forest.nodes(), - leave=False, - desc="validating", - unit=" taint nodes", - ): - # TODO (hbrodin): proper evaluation - if taint_node.is_canonical(): - assert taint_node.parent_one is None - assert taint_node.parent_two is None - else: - assert taint_node.parent_one is not None - assert taint_node.parent_two is not None - had_taint_union = True - # There was at least one taint union - assert had_taint_union diff --git a/tests/tracing.py b/tests/tracing.py index e196bca3..16ab3b8c 100644 --- a/tests/tracing.py +++ b/tests/tracing.py @@ -105,6 +105,7 @@ def validate_execute_target( "POLYFUNC": "1", "POLYTRACKER_STDOUT_SINK": getenv("POLYTRACKER_STDOUT_SINK", "0"), "POLYTRACKER_STDERR_SINK": getenv("POLYTRACKER_STDERR_SINK", "0"), + "POLYTRACKER_TAINT_ARGV": getenv("POLYTRACKER_TAINT_ARGV", "0"), } if taint_all: del env["POLYPATH"] From a8517f26f857f58d6c145adebba2c64b86db96ce Mon Sep 17 00:00:00 2001 From: hbrodin <90325907+hbrodin@users.noreply.github.com> Date: Wed, 14 Sep 2022 10:00:35 +0200 Subject: [PATCH 6/9] Slight refactor of polytracker pass. Improved error handling. --- polytracker/src/passes/polytracker_pass.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/polytracker/src/passes/polytracker_pass.cpp b/polytracker/src/passes/polytracker_pass.cpp index 519e4f2a..92d75d1a 100644 --- a/polytracker/src/passes/polytracker_pass.cpp +++ b/polytracker/src/passes/polytracker_pass.cpp @@ -309,17 +309,22 @@ static void emitTaintArgvCall(llvm::Function &main) { } auto argv_ty = argv->getType(); + // IRBuilder for emitting a call to __polytracker_taint_argv. Need to + // specify insertion point first, to ensure that no instruction can + // use argv before it is tainted. + llvm::IRBuilder<> irb(&*(main.getEntryBlock().getFirstInsertionPt())); + // Define the target function type and make it available in the module - auto taint_argv_ty = llvm::FunctionType::get( - llvm::Type::getVoidTy(main.getContext()), {argc_ty, argv_ty}, false); + auto taint_argv_ty = + llvm::FunctionType::get(irb.getVoidTy(), {argc_ty, argv_ty}, false); llvm::FunctionCallee taint_argv = main.getParent()->getOrInsertFunction( "__polytracker_taint_argv", taint_argv_ty); - assert(taint_argv); + if (!taint_argv) { + spdlog::error("Failed to declare __polytracker_taint_argv."); + return; + } - // Emit a call to the taint_argv function using parameters from main. - auto &bb = main.getEntryBlock(); - llvm::Instruction &insert_point = *(bb.getFirstInsertionPt()); - llvm::IRBuilder<> irb(&insert_point); + // Emit the call using parameters from main. auto ci = irb.CreateCall(taint_argv, {argc, argv}); if (!ci) { spdlog::error("Failed to insert call to taint_argv."); From 68639bab07eeec58b7d4010a009530e804406037 Mon Sep 17 00:00:00 2001 From: hbrodin <90325907+hbrodin@users.noreply.github.com> Date: Wed, 14 Sep 2022 10:01:12 +0200 Subject: [PATCH 7/9] Reduced namespace pollution. --- polytracker/src/taint_sources/argv.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/polytracker/src/taint_sources/argv.cpp b/polytracker/src/taint_sources/argv.cpp index f7020663..20459709 100644 --- a/polytracker/src/taint_sources/argv.cpp +++ b/polytracker/src/taint_sources/argv.cpp @@ -4,13 +4,12 @@ #include "polytracker/polytracker.h" #include "taintdag/polytracker.h" -using namespace std::literals; - EARLY_CONSTRUCT_EXTERN_GETTER(taintdag::PolyTracker, polytracker_tdag); namespace polytracker { void taint_argv(int argc, char *argv[]) { + // The check could be done in the calling code, for performance reasons. // However this function should only ever be invoked once (from main). if (!polytracker_taint_argv) @@ -24,7 +23,7 @@ void taint_argv(int argc, char *argv[]) { auto &polyt = get_polytracker_tdag(); for (int i = 0; i < argc; i++) { - auto name = "argv["s + std::to_string(i) + "]"; + auto name = std::string{"argv["} + std::to_string(i) + "]"; // NOTE(hbrodin): Currently not tainting terminating null char. polyt.create_taint_source( name, {reinterpret_cast(argv[i]), strlen(argv[i])}); From 9e1d79d75b20bee8a867544e0e403925603c8ebc Mon Sep 17 00:00:00 2001 From: hbrodin <90325907+hbrodin@users.noreply.github.com> Date: Wed, 14 Sep 2022 10:04:53 +0200 Subject: [PATCH 8/9] Documenting the environment variable that controls argv tainting --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 83ea36e7..71b53d0c 100644 --- a/README.md +++ b/README.md @@ -206,6 +206,8 @@ variables PolyTracker supports is: POLYDB: A path to which to save the output database (default is polytracker.tdag) WLLVM_ARTIFACT_STORE: Provides a path to an existing directory to store artifact/manifest for all build targets + +POLYTRACKER_TAINT_ARGV: Set to '1' to use argv as a taint source. ``` Polytracker will set its configuration parameters in the following order: From 250c9f711af9a1f097e36809f96d96923162b091 Mon Sep 17 00:00:00 2001 From: Henrik Brodin <90325907+hbrodin@users.noreply.github.com> Date: Wed, 14 Sep 2022 13:11:40 +0200 Subject: [PATCH 9/9] Update polytracker/src/taint_sources/argv.cpp pre-increment i instead of post. --- polytracker/src/taint_sources/argv.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytracker/src/taint_sources/argv.cpp b/polytracker/src/taint_sources/argv.cpp index 20459709..a2339c38 100644 --- a/polytracker/src/taint_sources/argv.cpp +++ b/polytracker/src/taint_sources/argv.cpp @@ -22,7 +22,7 @@ void taint_argv(int argc, char *argv[]) { auto &polyt = get_polytracker_tdag(); - for (int i = 0; i < argc; i++) { + for (int i = 0; i < argc; ++i) { auto name = std::string{"argv["} + std::to_string(i) + "]"; // NOTE(hbrodin): Currently not tainting terminating null char. polyt.create_taint_source(