Skip to content

Commit

Permalink
Be more robust to seeing a poptrap without a matching pushtrap
Browse files Browse the repository at this point in the history
Also add more verbose logging for decode errors.

Signed-off-by: Tudor Brindus <[email protected]>
  • Loading branch information
Xyene committed Feb 19, 2024
1 parent 492afe4 commit 35a0fbd
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 13 deletions.
43 changes: 33 additions & 10 deletions src/trace_writer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -826,18 +826,31 @@ end = struct
thread_info.callstack <- Callstack.create ~create_time:time;
Callstack.push thread_info.callstack top
| Poptrap ->
(* We should only have the synthetic frame we created at this point. If we
have more than that, we've gotten confused in our state tracking
somewhere. *)
(* Assuming we didn't drop anything, we should only have the synthetic
frame we created at this point. If we have more than that, we either got
confused in our state tracking somewhere, or more likely, IPT dropped
some data. *)
if Callstack.depth thread_info.callstack <> 1
then
eprint_s
[%message
"BUG: expected callstack to be depth 1 at the time we encountered a \
poptrap, but it wasn't"
~callstack:(thread_info.callstack : Callstack.t)];
ignore (Callstack.pop thread_info.callstack : _);
clear_trap_stack t thread_info ~time));
eprintf
"Warning: expected callstack depth to be the same going into a [try] \
block as when leaving it, but it wasn't (off by %d). Did Intel \
Processor Trace drop some data? Will attempt to recover.\n\
%!"
(Callstack.depth thread_info.callstack - 1)
else (
(* Only pop the exception callstack if we're at the same callstack
depth as we were when we saw [Pushtrap]. This should let us recover
from situations like:
- Pushtrap 1
- Pushtrap 2
- Poptrap 2
- Poptrap 1
where "Pushtrap 2" gets dropped. *)
ignore (Callstack.pop thread_info.callstack : _);
clear_trap_stack t thread_info ~time)));
last_known_instruction_pointer := Some dst.instruction_pointer
;;
end
Expand Down Expand Up @@ -939,6 +952,15 @@ let write_event_and_callstack
event_and_callstack
;;

let warn_decode_error ~instruction_pointer ~message =
eprintf
"Warning: perf reported an error decoding the trace: %s\n!"
(match instruction_pointer with
| None -> [%string "'%{message}'"]
| Some instruction_pointer ->
[%string "'%{message}' @ IP %{instruction_pointer#Int64.Hex}."])
;;

(* Write perf_events into a file as a Fuschia trace (stack events). Events should be
collected with --itrace=be or cre, and -F pid,tid,time,flags,addr,sym,symoff as per
the constants defined above. *)
Expand All @@ -955,6 +977,7 @@ let write_event (T t) ?events_writer event =
maybe_stop_filtered_region t ~should_write;
match event with
| Error { thread = _; instruction_pointer; message; time = _ } ->
warn_decode_error ~instruction_pointer ~message;
let name = sprintf !"[decode error: %s]" message in
write_duration_instant t ~thread ~name ~time ~args:[];
let is_kernel_address =
Expand Down
9 changes: 7 additions & 2 deletions test/decode_errors.ml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ let%expect_test "decode error during memmove" =
INPUT TRACE STREAM ENDED, any lines printed below this were deferred
-> 21ns BEGIN itch_bbo::book::Book::add_order [inferred start time]
-> 141ns END __memmove_avx_unaligned_erms
-> 141ns END itch_bbo::book::Book::add_order |}]
-> 141ns END itch_bbo::book::Book::add_order
Warning: perf reported an error decoding the trace: 'Overflow packet' @ IP 0x7ffff7327730.
! |}]
;;

let%expect_test "decode error during rust B-tree rebalance" =
Expand Down Expand Up @@ -95,5 +97,8 @@ let%expect_test "decode error during rust B-tree rebalance" =
-> 419ns END _int_free
-> 419ns END merge_tracking_child_edge
-> 420ns END remove_leaf_kv
INPUT TRACE STREAM ENDED, any lines printed below this were deferred |}]
INPUT TRACE STREAM ENDED, any lines printed below this were deferred
Warning: perf reported an error decoding the trace: 'Overflow packet' @ IP 0x7ffff7327730.
!Warning: perf reported an error decoding the trace: 'Overflow packet' @ IP 0x7ffff7327730.
! |}]
;;
4 changes: 3 additions & 1 deletion test/page_fault.ml
Original file line number Diff line number Diff line change
Expand Up @@ -643,5 +643,7 @@ let%expect_test "A page fault during demo.c" =
-> 224ns BEGIN _dl_catch_exception [inferred start time]
-> 224ns BEGIN _fini [inferred start time]
-> 1.373us END _fini
-> 1.373us END _dl_catch_exception |}]
-> 1.373us END _dl_catch_exception
Warning: perf reported an error decoding the trace: 'Overflow packet (foo.so)' @ IP 0x7f5948676e18.
! |}]
;;

0 comments on commit 35a0fbd

Please sign in to comment.