Skip to content

Commit

Permalink
Merge pull request #6486 from trailofbits/Henrik/taint-argv
Browse files Browse the repository at this point in the history
  • Loading branch information
surovic authored Sep 14, 2022
2 parents 0a45a60 + 250c9f7 commit 6ecf701
Show file tree
Hide file tree
Showing 19 changed files with 209 additions and 9 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ if(NOT EXISTS "${PROJECT_SOURCE_DIR}/third_party/indicators/CMakeLists.txt")
message(FATAL_ERROR "git submodule update --init --recursive must be run first to checkout submodules")
endif()

set(CMAKE_CXX_STANDARD 20)

add_subdirectory(third_party/Catch2)
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/third_party/Catch2/contrib")
add_subdirectory(third_party/indicators)
Expand Down
7 changes: 4 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,17 @@ RUN update-ca-certificates
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 10
RUN python3 -m pip install pip && python3 -m pip install pytest

WORKDIR /blight
RUN git clone https://github.com/trailofbits/blight.git .
RUN pip3 install .

COPY . /polytracker

RUN mkdir /polytracker/build
WORKDIR /polytracker/build
RUN cmake -GNinja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_VERBOSE_MAKEFILE=TRUE -DCXX_LIB_PATH=/cxx_libs ..
RUN ninja install

WORKDIR /blight
RUN git clone https://github.com/trailofbits/blight.git .
RUN pip3 install .

WORKDIR /polytracker
RUN pip3 install .
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ variables PolyTracker supports is:
POLYDB: A path to which to save the output database (default is polytracker.tdag)
WLLVM_ARTIFACT_STORE: Provides a path to an existing directory to store artifact/manifest for all build targets
POLYTRACKER_TAINT_ARGV: Set to '1' to use argv as a taint source.
```

Polytracker will set its configuration parameters in the following order:
Expand Down
2 changes: 2 additions & 0 deletions polytracker/custom_abi/dfsan_abilist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ fun:__polytracker_store_blob=uninstrumented
fun:__polytracker_store_blob=discard
fun:__polytracker_preserve_map=uninstrumented
fun:__polytracker_preserve_map=discard
fun:__polytracker_taint_argv=uninstrumented
fun:__polytracker_taint_argv=discard

fun:__dfsan_update_label_count=uninstrumented
fun:__dfsan_update_label_count=discard
Expand Down
2 changes: 2 additions & 0 deletions polytracker/custom_abi/polytracker_abilist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ fun:__polytracker_store_blob=uninstrumented
fun:__polytracker_store_blob=discard
fun:__polytracker_preserve_map=uninstrumented
fun:__polytracker_preserve_map=discard
fun:__polytracker_taint_argv=uninstrumented
fun:__polytracker_taint_argv=discard

fun:__remill_jump=uninstrumented
fun:__remill_jump=discard
Expand Down
3 changes: 3 additions & 0 deletions polytracker/include/polytracker/polytracker.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,6 @@ extern uint64_t func_mapping_count;

extern const block_mapping *block_mappings;
extern uint64_t block_mapping_count;

// Controls argv being a taint source
extern bool polytracker_taint_argv;
4 changes: 4 additions & 0 deletions polytracker/include/polytracker/taint_sources.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,8 @@
#define EXT_C_FUNC extern "C" __attribute__((visibility("default")))
#define EXT_CXX_FUNC extern __attribute__((visibility("default")))

namespace polytracker {
void taint_argv(int argc, char *argv[]);
}

#endif
8 changes: 8 additions & 0 deletions polytracker/include/taintdag/polytracker.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define POLYTRACKER_TAINTDAG_POLYTRACKER_H

#include <filesystem>
#include <span>

#include "taintdag/fdmapping.hpp"
#include "taintdag/output.hpp"
Expand Down Expand Up @@ -32,6 +33,13 @@ class PolyTracker {
std::optional<taint_range_t> source_taint(int fd, source_offset_t offset,
size_t length);

// Create a new taint source (not a file) and assigns taint labels
// A new taint source named 'name' is created
// Memory in 'dst' is assigned source taint labels referring to source 'name'
// and in increasing offset.
std::optional<taint_range_t> create_taint_source(std::string_view name,
std::span<uint8_t> dst);

// Update the label, it affects control flow
void affects_control_flow(label_t taint_label);

Expand Down
5 changes: 2 additions & 3 deletions polytracker/src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
set(CMAKE_CXX_STANDARD 17)

find_package(LLVM 13 CONFIG)

if(LLVM_FOUND)
Expand Down Expand Up @@ -50,7 +48,8 @@ set(CMAKE_EXE_LINKER_FLAGS
set(POLY_SOURCES ${POLY_DIR}/main.cpp ${POLY_DIR}/polytracker.cpp)

set(TAINT_SOURCES ${TAINT_DIR}/taint_sources.cpp ${TAINT_DIR}/string_taints.cpp
${TAINT_DIR}/memory_taints.cpp ${TAINT_DIR}/write_taints.cpp)
${TAINT_DIR}/memory_taints.cpp ${TAINT_DIR}/write_taints.cpp
${TAINT_DIR}/argv.cpp)

set(TAINTDAG_SOURCES ${TAINTDAG_DIR}/encoding.cpp ${TAINTDAG_DIR}/fdmapping.cpp
${TAINTDAG_DIR}/output.cpp ${TAINTDAG_DIR}/polytracker.cpp)
Expand Down
48 changes: 48 additions & 0 deletions polytracker/src/passes/polytracker_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
// #include "polytracker/thread_pool.h"
#include "spdlog/cfg/env.h"
#include "spdlog/spdlog.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
Expand Down Expand Up @@ -289,12 +290,54 @@ bool PolytrackerPass::analyzeBlock(llvm::Function *func,
return true;
}

// Inserts a function call to polytracker::taint_argv(argc, argv)
// Assumes main is actually the main function of the program and
// interprets first arg as argc and second as argv.
static void emitTaintArgvCall(llvm::Function &main) {
// Get the parameters of the main function, argc, argv
auto argc = main.getArg(0);
if (!argc) {
spdlog::error("Failed to instrument argv. No argc available.");
return;
}
auto argc_ty = argc->getType();

auto argv = main.getArg(1);
if (!argv) {
spdlog::error("Failed to instrument argv. No argv available.");
return;
}
auto argv_ty = argv->getType();

// IRBuilder for emitting a call to __polytracker_taint_argv. Need to
// specify insertion point first, to ensure that no instruction can
// use argv before it is tainted.
llvm::IRBuilder<> irb(&*(main.getEntryBlock().getFirstInsertionPt()));

// Define the target function type and make it available in the module
auto taint_argv_ty =
llvm::FunctionType::get(irb.getVoidTy(), {argc_ty, argv_ty}, false);
llvm::FunctionCallee taint_argv = main.getParent()->getOrInsertFunction(
"__polytracker_taint_argv", taint_argv_ty);
if (!taint_argv) {
spdlog::error("Failed to declare __polytracker_taint_argv.");
return;
}

// Emit the call using parameters from main.
auto ci = irb.CreateCall(taint_argv, {argc, argv});
if (!ci) {
spdlog::error("Failed to insert call to taint_argv.");
}
}

/*
We should instrument everything we have bitcode for, right?
If instructions have __polytracker, or they have __dfsan, ignore!
*/
bool PolytrackerPass::analyzeFunction(llvm::Function *f,
const func_index_t &func_index) {

// Add Function entry
polytracker::BBSplittingPass bbSplitter;
// llvm::removeUnreachableBlocks(*f);
Expand Down Expand Up @@ -340,6 +383,11 @@ bool PolytrackerPass::analyzeFunction(llvm::Function *f,
visit(inst);
}

// If this is the main function, insert a taint-argv call
if (f && f->getName() == "main") {
emitTaintArgvCall(*f);
}

return true;
}

Expand Down
8 changes: 8 additions & 0 deletions polytracker/src/polytracker/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,14 @@ DECLARE_EARLY_CONSTRUCT(std::string, polytracker_db_name);
DECLARE_EARLY_CONSTRUCT(std::string, polytracker_stderr_sink);
DECLARE_EARLY_CONSTRUCT(std::string, polytracker_stdout_sink);

// Controls argv being a taint source
bool polytracker_taint_argv = false;

uint64_t byte_start = 0;
uint64_t byte_end = 0;
bool polytracker_trace = false;
bool polytracker_trace_func = false;

/**
* Whether or not to save the input files to the output database
*/
Expand Down Expand Up @@ -65,6 +69,10 @@ void polytracker_parse_env() {
if (auto err = getenv("POLYTRACKER_STDERR_SINK")) {
get_polytracker_stderr_sink() = err;
}

if (auto argv = getenv("POLYTRACKER_TAINT_ARGV")) {
polytracker_taint_argv = argv[0] == '1';
}
}

/*
Expand Down
5 changes: 5 additions & 0 deletions polytracker/src/polytracker/polytracker.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "polytracker/polytracker.h"
#include "polytracker/early_construct.h"
#include "polytracker/taint_sources.h"
#include "taintdag/polytracker.h"
#include <atomic>
#include <inttypes.h>
Expand Down Expand Up @@ -87,6 +88,10 @@ extern "C" void __polytracker_start(func_mapping const *globals,
no_control_flow_tracing);
}

extern "C" void __polytracker_taint_argv(int argc, char *argv[]) {
polytracker::taint_argv(argc, argv);
}

extern "C" void
__polytracker_store_function_mapping(const func_mapping *func_map,
uint64_t *count) {
Expand Down
32 changes: 32 additions & 0 deletions polytracker/src/taint_sources/argv.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#include <string>

#include "polytracker/early_construct.h"
#include "polytracker/polytracker.h"
#include "taintdag/polytracker.h"

EARLY_CONSTRUCT_EXTERN_GETTER(taintdag::PolyTracker, polytracker_tdag);

namespace polytracker {

void taint_argv(int argc, char *argv[]) {

// The check could be done in the calling code, for performance reasons.
// However this function should only ever be invoked once (from main).
if (!polytracker_taint_argv)
return;

if (argc <= 0) {
// Weird. Not much to do though.
return;
}

auto &polyt = get_polytracker_tdag();

for (int i = 0; i < argc; ++i) {
auto name = std::string{"argv["} + std::to_string(i) + "]";
// NOTE(hbrodin): Currently not tainting terminating null char.
polyt.create_taint_source(
name, {reinterpret_cast<uint8_t *>(argv[i]), strlen(argv[i])});
}
}
} // namespace polytracker
1 change: 0 additions & 1 deletion polytracker/src/taintdag/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@

set(CMAKE_CXX_STANDARD 17)
add_subdirectory(test)

add_library(taintdag STATIC encoding.cpp fdmapping.cpp output.cpp print.cpp)
22 changes: 22 additions & 0 deletions polytracker/src/taintdag/polytracker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,28 @@ PolyTracker::source_taint(int fd, source_offset_t offset, size_t length) {
return create_source_taint(fd, offset, length);
}

std::optional<taint_range_t>
PolyTracker::create_taint_source(std::string_view name,
std::span<uint8_t> dst) {
// Reserve a contiguous range of labels for this source
auto rng = tdag_.reserve_source_labels(dst.size());

// Register the source by name (and its preallocated range).
auto idx = fdm_.add_mapping(-1, name, rng);
if (!idx)
return {};

// Construct the allocated labels as source labels belonging to source 'idx'
tdag_.assign_source_labels(rng, *idx, 0);

// Mark memory with corresponding labels
auto lbl = rng.first;
for (auto &c : dst) {
dfsan_set_label(lbl++, &c, sizeof(char));
}
return rng;
}

void PolyTracker::taint_sink(int fd, sink_offset_t offset, void const *mem,
size_t length) {
auto idx = fdm_.mapping_idx(fd);
Expand Down
2 changes: 0 additions & 2 deletions polytracker/src/taintdag/test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
set(CMAKE_CXX_STANDARD 17)

include(CTest)
include(Catch)

Expand Down
10 changes: 10 additions & 0 deletions tests/test_argv.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#include <cstring>
#include <cstdio>

int main(int argc, char *argv[]) {
auto f = fopen("outputfile.txt", "w");
for (int i=0;i<argc;i++) {
fwrite(argv[i], strlen(argv[i]), 1, f);
}
fclose(f);
}
54 changes: 54 additions & 0 deletions tests/test_argv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import pytest

from pathlib import Path
from polytracker import taint_dag, ProgramTrace


@pytest.fixture
def set_env_vars(monkeypatch):
monkeypatch.setenv("POLYTRACKER_TAINT_ARGV", "1")


@pytest.mark.program_trace("test_argv.cpp", input="any")
def test_argv(set_env_vars, program_trace: ProgramTrace):
assert isinstance(program_trace, taint_dag.TDProgramTrace)
argv0 = Path("argv[0]")
argv1 = Path("argv[1]")
headers = list(program_trace.tdfile.fd_headers)
paths = list(map(lambda h: h[0], headers))
assert len(paths) == 3
assert argv0 in paths
assert argv1 in paths

sinks = list(program_trace.tdfile.sinks)

with open("outputfile.txt", "r") as f:
output = f.read()

assert len(output) == len(sinks)

last_fdidx = 0
last_offset = 0
for s in sinks:
sink_fd_idx = s.fdidx
label = s.label

n = program_trace.tdfile.decode_node(label)
# No transformation/union of argv is made
assert isinstance(n, taint_dag.TDSourceNode)

# If we just stepped to the next taint source, reset the offset
if last_fdidx != n.idx:
last_offset = 0

# First write argv[0], then argv[1], ...
assert last_fdidx <= n.idx

# Write argv[x], all offsets
assert last_offset <= n.offset

# Source file indices for argv[x] are opened before output file
assert n.idx < sink_fd_idx

last_offset = n.offset
last_fdidx = n.idx
1 change: 1 addition & 0 deletions tests/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def validate_execute_target(
"POLYFUNC": "1",
"POLYTRACKER_STDOUT_SINK": getenv("POLYTRACKER_STDOUT_SINK", "0"),
"POLYTRACKER_STDERR_SINK": getenv("POLYTRACKER_STDERR_SINK", "0"),
"POLYTRACKER_TAINT_ARGV": getenv("POLYTRACKER_TAINT_ARGV", "0"),
}
if taint_all:
del env["POLYPATH"]
Expand Down

0 comments on commit 6ecf701

Please sign in to comment.