Skip to content

Commit

Permalink
pack: Add support for using a common pack dir for multiple traces
Browse files Browse the repository at this point in the history
When packaging traces from CI, it's fairly commong to have hundreds of traces
that all basically share the exact same files. This can lead to some fairly
large traces after packing. Of course, some file-systems support block-level
deduplication and a compression library would certainly be able to dedup it
back down as well, but it'd be faster to not create trace directories that
big on disk in the first place.

This adds a `--pack-dir` command to `rr pack <traces...>`, which is used
as a the common pack dir for all traces. Rather than packing files into
their own trace dirs, they will be packed into the `pack-dir`, with relative
symlinks from the original trace directories to the pack dir. An unmodified
rr will be able to replay these as long as the pack dir is moved along
with the trace dirs.
  • Loading branch information
Keno committed Apr 24, 2024
1 parent 5b7c1f4 commit 00b1914
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 4 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAGS_COMMON} -Wstrict-prototypes -std=gnu11")
# Define __STDC_LIMIT_MACROS so |#include <stdint.h>| works as expected.
# Define __STDC_FORMAT_MACROS so |#include <inttypes.h>| works as expected.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS_COMMON} -D__STDC_LIMIT_MACROS -D__STDC_FORMAT_MACROS -std=c++14")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS_COMMON} -D__STDC_LIMIT_MACROS -D__STDC_FORMAT_MACROS -std=c++17")

# We support three build types:
# DEBUG: suitable for debugging rr
Expand Down
34 changes: 31 additions & 3 deletions src/PackCommand.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <unistd.h>

#include <algorithm>
#include <filesystem>
#include <limits>
#include <map>
#include <set>
Expand Down Expand Up @@ -67,6 +68,7 @@ struct PackFlags {
/* If true, insert symlinks into the trace dir which point to the original
* files, rather than copying the files themselves */
bool symlink;
std::string pack_dir;

PackFlags()
: symlink(false) {}
Expand Down Expand Up @@ -99,6 +101,12 @@ bool operator<(const FsExtentsHash& h1, const FsExtentsHash& h2) {
return memcmp(h1.bytes, h2.bytes, sizeof(h1)) < 0;
}

struct PackDir {
string dir;
map<FileHash, string> mapped_files;
PackDir(string dir) : dir(dir) {}
};

static bool name_comparator(const TraceReader::MappedData& d1,
const TraceReader::MappedData d2) {
return d1.file_name < d2.file_name;
Expand Down Expand Up @@ -528,7 +536,8 @@ static map<string, string> compute_canonical_symlink_map(
* for all files with that hash.
*/
static map<string, string> compute_canonical_mmapped_files(
const string& trace_dir) {
const string& trace_dir,
PackDir &pack_dir) {
map<string, FileHash> file_info = gather_file_info(trace_dir);

map<FileHash, string> hash_to_name;
Expand All @@ -545,10 +554,24 @@ static map<string, string> compute_canonical_mmapped_files(

int name_index = 0;
for (auto& p : hash_to_name) {
// Check if this in our common pack directory
auto it = pack_dir.mapped_files.find(p.first);
if (it != pack_dir.mapped_files.end()) {
LOG(debug) << "Found in common pack dir";
p.second = symlink_into_trace(fs::relative(it->second, trace_dir), trace_dir, &name_index);
continue;
}

// Copy hardlinked files into the trace to avoid the possibility of someone
// overwriting the original file.
if (is_hardlink(p.second) || !is_in_trace_dir(p.second, trace_dir)) {
p.second = copy_into_trace(p.second, trace_dir, &name_index);
if (pack_dir.dir != "") {
// If a pack dir is specified, first copy into pack dir, then symlink into trace.
auto path = pack_dir.mapped_files[p.first] = copy_into_trace(p.second, pack_dir.dir, &name_index);
p.second = symlink_into_trace(fs::relative(path, trace_dir), trace_dir, &name_index);
} else {
p.second = copy_into_trace(p.second, trace_dir, &name_index);
}
}
}

Expand Down Expand Up @@ -656,6 +679,7 @@ static int pack(const string& trace_dir, const PackFlags& flags) {
dir = reader.dir();
}

PackDir pack_dir(flags.pack_dir);
char buf[PATH_MAX];
char* ret = realpath(dir.c_str(), buf);
if (!ret) {
Expand All @@ -670,7 +694,7 @@ static int pack(const string& trace_dir, const PackFlags& flags) {
delete_unnecessary_files(canonical_symlink_map, abspath);
} else {
map<string, string> canonical_mmapped_files =
compute_canonical_mmapped_files(abspath);
compute_canonical_mmapped_files(abspath, pack_dir);
rewrite_mmaps(canonical_mmapped_files, abspath);
delete_unnecessary_files(canonical_mmapped_files, abspath);
}
Expand All @@ -685,6 +709,7 @@ static int pack(const string& trace_dir, const PackFlags& flags) {
static bool parse_pack_arg(vector<string>& args, PackFlags& flags) {
static const OptionSpec options[] = {
{ 0, "symlink", NO_PARAMETER },
{ 1, "pack-dir", HAS_PARAMETER },
};
ParsedOption opt;
auto args_copy = args;
Expand All @@ -696,6 +721,9 @@ static bool parse_pack_arg(vector<string>& args, PackFlags& flags) {
case 0:
flags.symlink = true;
break;
case 1:
flags.pack_dir = opt.value;
break;
default:
DEBUG_ASSERT(0 && "Unknown pack option");
}
Expand Down

0 comments on commit 00b1914

Please sign in to comment.