From 31c82c8521e6cca74f27e0cc8fe22df3f26cddd0 Mon Sep 17 00:00:00 2001 From: KJ Tsanaktsidis Date: Sun, 25 Aug 2024 17:19:36 +1000 Subject: [PATCH] Record that the sigframe write recorded is conservative When handling a signal in a tracee, the kernel will spill some data to the stack (the "sigframe"), and this data needs to be recorded in the trace for replaying. However, we don't actually know how much data the kernel will write. Instead, we take a conservative estimate which is guaranteed to be at least that size, and record that amount of memory. However, that range might cross a memory mapping, and during replay, part of that range might not be mapped. Currently that raises an assertion error because the write is smaller than the read. In this patch, we record whether a memory "write" is actually this kind of conservative capture, and mark it as such in the trace. During replay, we then do not fail that assertion. --- src/RecordSession.cc | 4 +++- src/RecordTask.cc | 11 ++++++----- src/RecordTask.h | 6 ++++-- src/ReplayTask.cc | 10 ++++++++-- src/Task.cc | 8 ++------ src/Task.h | 2 +- src/TraceStream.cc | 11 ++++++++--- src/TraceStream.h | 21 ++++++++++++++++++--- src/rr_trace.capnp | 4 ++++ 9 files changed, 54 insertions(+), 23 deletions(-) diff --git a/src/RecordSession.cc b/src/RecordSession.cc index 146d2091433..71703864250 100644 --- a/src/RecordSession.cc +++ b/src/RecordSession.cc @@ -19,6 +19,7 @@ #include "PerfCounters.h" #include "RecordTask.h" #include "TraceeAttentionSet.h" +#include "TraceStream.h" #include "VirtualPerfCounterMonitor.h" #include "WaitManager.h" #include "core.h" @@ -1758,7 +1759,8 @@ bool RecordSession::signal_state_changed(RecordTask* t, StepState* step_state) { // We record this data even if sigframe_size is zero to simplify replay. // Stop recording data if we run off the end of a writable mapping. // Our sigframe size is conservative so we need to do this. - t->record_remote_writable(t->regs().sp(), sigframe_size); + t->record_remote_writable(t->regs().sp(), sigframe_size, + MemWriteSizeValidation::CONSERVATIVE); // This event is used by the replayer to set up the signal handler frame. // But if we don't have a handler, we don't want to record the event diff --git a/src/RecordTask.cc b/src/RecordTask.cc index 06f2a0c4b3a..52fba523ae1 100644 --- a/src/RecordTask.cc +++ b/src/RecordTask.cc @@ -1724,7 +1724,8 @@ bool RecordTask::record_remote_by_local_map(remote_ptr addr, return false; } -void RecordTask::record_remote(remote_ptr addr, ssize_t num_bytes) { +void RecordTask::record_remote(remote_ptr addr, ssize_t num_bytes, + MemWriteSizeValidation size_validation) { ASSERT(this, num_bytes >= 0); if (!addr) { @@ -1750,11 +1751,11 @@ void RecordTask::record_remote(remote_ptr addr, ssize_t num_bytes) { ASSERT(this, false) << "Should have recorded " << num_bytes << " bytes from " << addr << ", but failed"; } - trace_writer().write_raw(rec_tid, buf.data(), num_bytes, addr); + trace_writer().write_raw(rec_tid, buf.data(), num_bytes, addr, size_validation); } -void RecordTask::record_remote_writable(remote_ptr addr, - ssize_t num_bytes) { +void RecordTask::record_remote_writable(remote_ptr addr, ssize_t num_bytes, + MemWriteSizeValidation size_validation) { ASSERT(this, num_bytes >= 0); remote_ptr p = addr; @@ -1777,7 +1778,7 @@ void RecordTask::record_remote_writable(remote_ptr addr, } num_bytes = min(num_bytes, p - addr); - record_remote(addr, num_bytes); + record_remote(addr, num_bytes, size_validation); } ssize_t RecordTask::record_remote_fallible(remote_ptr addr, diff --git a/src/RecordTask.h b/src/RecordTask.h index 7f42e678259..18db093643d 100644 --- a/src/RecordTask.h +++ b/src/RecordTask.h @@ -400,7 +400,8 @@ class RecordTask final : public Task { void record_local(remote_ptr addr, const T* buf, size_t count = 1) { record_local(addr, sizeof(T) * count, buf); } - void record_remote(remote_ptr addr, ssize_t num_bytes); + void record_remote(remote_ptr addr, ssize_t num_bytes, + MemWriteSizeValidation size_validation = MemWriteSizeValidation::EXACT); template void record_remote(remote_ptr addr) { record_remote(addr, sizeof(T)); } @@ -417,7 +418,8 @@ class RecordTask final : public Task { // Record as much as we can of the bytes in this range. Will record only // contiguous mapped-writable data starting at `addr`. rr mappings (e.g. syscallbuf) // are treated as non-contiguous with any other mapping. - void record_remote_writable(remote_ptr addr, ssize_t num_bytes); + void record_remote_writable(remote_ptr addr, ssize_t num_bytes, + MemWriteSizeValidation size_validation = MemWriteSizeValidation::EXACT); // Simple helper that attempts to use the local mapping to record if one // exists diff --git a/src/ReplayTask.cc b/src/ReplayTask.cc index b6fb569f105..5005828438c 100644 --- a/src/ReplayTask.cc +++ b/src/ReplayTask.cc @@ -157,8 +157,14 @@ static size_t write_data_with_holes(ReplayTask* t, if (holes_iter != buf.holes.end()) { data_end = data_offset + holes_iter->offset - addr_offset; } - t->write_bytes_helper(buf.addr + addr_offset, data_end - data_offset, buf.data.data() + data_offset, - nullptr); + bool ok = true; + ssize_t nwritten = t->write_bytes_helper(buf.addr + addr_offset, data_end - data_offset, + buf.data.data() + data_offset,&ok); + + ASSERT(t, ok || buf.size_validation == MemWriteSizeValidation::CONSERVATIVE) + << "Should have written " << buf.data.size() << " bytes to " << (buf.addr + addr_offset) + << ", but only wrote " << nwritten; + addr_offset += data_end - data_offset; data_offset = data_end; } diff --git a/src/Task.cc b/src/Task.cc index 407e422e55e..ab457faaea1 100644 --- a/src/Task.cc +++ b/src/Task.cc @@ -3213,17 +3213,13 @@ static ssize_t safe_pwrite64(Task* t, const void* buf, ssize_t buf_size, return nwritten; } -void Task::write_bytes_helper(remote_ptr addr, ssize_t buf_size, +ssize_t Task::write_bytes_helper(remote_ptr addr, ssize_t buf_size, const void* buf, bool* ok, uint32_t flags) { - ASSERT(this, buf_size >= 0) << "Invalid buf_size " << buf_size; - if (0 == buf_size) { - return; - } - ssize_t nwritten = write_bytes_helper_no_notifications(addr, buf_size, buf, ok, flags); if (nwritten > 0) { vm()->notify_written(addr, nwritten, flags); } + return nwritten; } ssize_t Task::write_bytes_helper_no_notifications(remote_ptr addr, ssize_t buf_size, diff --git a/src/Task.h b/src/Task.h index 58316aae2c1..0adf7cce2ab 100644 --- a/src/Task.h +++ b/src/Task.h @@ -816,7 +816,7 @@ class Task { /** * |flags| is bits from WriteFlags. */ - void write_bytes_helper(remote_ptr addr, ssize_t buf_size, + ssize_t write_bytes_helper(remote_ptr addr, ssize_t buf_size, const void* buf, bool* ok = nullptr, uint32_t flags = 0); /** diff --git a/src/TraceStream.cc b/src/TraceStream.cc index ed312b7bc34..c9983c99ca9 100644 --- a/src/TraceStream.cc +++ b/src/TraceStream.cc @@ -422,6 +422,7 @@ void TraceWriter::write_frame(RecordTask* t, const Event& ev, w.setTid(r.rec_tid); w.setAddr(r.addr.as_int()); w.setSize(r.size); + w.setSizeIsConservative(r.size_validation == MemWriteSizeValidation::CONSERVATIVE); auto holes = w.initHoles(r.holes.size()); for (size_t j = 0; j < r.holes.size(); ++j) { holes[j].setOffset(r.holes[j].offset); @@ -573,7 +574,8 @@ TraceFrame TraceReader::read_frame(FrameTime skip_before) { const auto& hole = holes[j]; h[j] = { hole.getOffset(), hole.getSize() }; } - raw_recs[i] = { w.getAddr(), (size_t)w.getSize(), i32_to_tid(w.getTid()), h }; + raw_recs[i] = { w.getAddr(), (size_t)w.getSize(), i32_to_tid(w.getTid()), h, + w.getSizeIsConservative() ? MemWriteSizeValidation::CONSERVATIVE : MemWriteSizeValidation::EXACT }; } if (ret.global_time < skip_before) { @@ -1237,8 +1239,9 @@ KernelMapping TraceReader::read_mapped_region(MappedData* data, bool* found, void TraceWriter::write_raw_header(pid_t rec_tid, size_t total_len, remote_ptr addr, - const std::vector& holes = std::vector()) { - raw_recs.push_back({ addr, total_len, rec_tid, holes }); + const std::vector& holes, + MemWriteSizeValidation size_validation) { + raw_recs.push_back({ addr, total_len, rec_tid, holes, size_validation }); } void TraceWriter::write_raw_data(const void* d, size_t len) { @@ -1253,6 +1256,7 @@ bool TraceReader::read_raw_data_for_frame(RawData& d) { auto& rec = raw_recs[raw_recs.size() - 1]; d.rec_tid = rec.rec_tid; d.addr = rec.addr; + d.size_validation = rec.size_validation; d.data.resize(rec.size); auto hole_iter = rec.holes.begin(); @@ -1285,6 +1289,7 @@ bool TraceReader::read_raw_data_for_frame_with_holes(RawDataWithHoles& d) { d.addr = rec.addr; d.holes = std::move(rec.holes); size_t data_size = rec.size; + d.size_validation = rec.size_validation; for (auto& h : d.holes) { data_size -= h.size; } diff --git a/src/TraceStream.h b/src/TraceStream.h index f7a0306651a..7dfde3f12ed 100644 --- a/src/TraceStream.h +++ b/src/TraceStream.h @@ -38,6 +38,16 @@ struct WriteHole { uint64_t size; }; +enum class MemWriteSizeValidation { + /* A data record where we _know_ that this data was all written, and it + * _must_ be replicated in the replayed process in its entirety */ + EXACT, + /* We recorded a range of memory that's a conservative over-estimate of + * what was actually written, and it might not replay cleanly in its + * entirety in a replayed process */ + CONSERVATIVE, +}; + /** * TraceStream stores all the data common to both recording and * replay. TraceWriter deals with recording-specific logic, and @@ -56,6 +66,7 @@ class TraceStream { size_t size; pid_t rec_tid; std::vector holes; + MemWriteSizeValidation size_validation; }; /** @@ -224,13 +235,15 @@ class TraceWriter : public TraceStream { * 'addr' is the address in the tracee where the data came from/will be * restored to. */ - void write_raw(pid_t tid, const void* data, size_t len, remote_ptr addr) { + void write_raw(pid_t tid, const void* data, size_t len, remote_ptr addr, + MemWriteSizeValidation size_validation = MemWriteSizeValidation::EXACT) { write_raw_data(data, len); - write_raw_header(tid, len, addr, std::vector()); + write_raw_header(tid, len, addr, std::vector(), size_validation); } void write_raw_data(const void* data, size_t len); void write_raw_header(pid_t tid, size_t total_len, remote_ptr addr, - const std::vector& holes); + const std::vector& holes, + MemWriteSizeValidation size_validation = MemWriteSizeValidation::EXACT); /** * Write a task event (clone or exec record) to the trace. @@ -344,6 +357,7 @@ class TraceReader : public TraceStream { std::vector data; remote_ptr addr; pid_t rec_tid; + MemWriteSizeValidation size_validation; }; /** @@ -354,6 +368,7 @@ class TraceReader : public TraceStream { remote_ptr addr; pid_t rec_tid; std::vector holes; + MemWriteSizeValidation size_validation; }; /** diff --git a/src/rr_trace.capnp b/src/rr_trace.capnp index a575f2f4375..f87014e2410 100644 --- a/src/rr_trace.capnp +++ b/src/rr_trace.capnp @@ -244,6 +244,10 @@ struct MemWrite { # A list of regions where zeroes are written. These are not # present in the compressed data. holes @3 :List(WriteHole); + # This is set for writes where we don't actually know that the write + # will apply in full in the replayee (e.g. for conservative sigframe + # captures when handling EV_SIGNAL). + sizeIsConservative @4 :Bool; } enum Arch {