From 35a0fbd4de19bdb84e5698a85abac689f5f2ee20 Mon Sep 17 00:00:00 2001 From: Tudor Brindus Date: Sat, 17 Feb 2024 18:50:44 -0500 Subject: [PATCH] Be more robust to seeing a poptrap without a matching pushtrap Also add more verbose logging for decode errors. Signed-off-by: Tudor Brindus --- src/trace_writer.ml | 43 +++++++++++++++++++++++++++++++++---------- test/decode_errors.ml | 9 +++++++-- test/page_fault.ml | 4 +++- 3 files changed, 43 insertions(+), 13 deletions(-) diff --git a/src/trace_writer.ml b/src/trace_writer.ml index 0164ac7d0..251ed8b79 100644 --- a/src/trace_writer.ml +++ b/src/trace_writer.ml @@ -826,18 +826,31 @@ end = struct thread_info.callstack <- Callstack.create ~create_time:time; Callstack.push thread_info.callstack top | Poptrap -> - (* We should only have the synthetic frame we created at this point. If we - have more than that, we've gotten confused in our state tracking - somewhere. *) + (* Assuming we didn't drop anything, we should only have the synthetic + frame we created at this point. If we have more than that, we either got + confused in our state tracking somewhere, or more likely, IPT dropped + some data. *) if Callstack.depth thread_info.callstack <> 1 then - eprint_s - [%message - "BUG: expected callstack to be depth 1 at the time we encountered a \ - poptrap, but it wasn't" - ~callstack:(thread_info.callstack : Callstack.t)]; - ignore (Callstack.pop thread_info.callstack : _); - clear_trap_stack t thread_info ~time)); + eprintf + "Warning: expected callstack depth to be the same going into a [try] \ + block as when leaving it, but it wasn't (off by %d). Did Intel \ + Processor Trace drop some data? Will attempt to recover.\n\ + %!" + (Callstack.depth thread_info.callstack - 1) + else ( + (* Only pop the exception callstack if we're at the same callstack + depth as we were when we saw [Pushtrap]. This should let us recover + from situations like: + + - Pushtrap 1 + - Pushtrap 2 + - Poptrap 2 + - Poptrap 1 + + where "Pushtrap 2" gets dropped. *) + ignore (Callstack.pop thread_info.callstack : _); + clear_trap_stack t thread_info ~time))); last_known_instruction_pointer := Some dst.instruction_pointer ;; end @@ -939,6 +952,15 @@ let write_event_and_callstack event_and_callstack ;; +let warn_decode_error ~instruction_pointer ~message = + eprintf + "Warning: perf reported an error decoding the trace: %s\n!" + (match instruction_pointer with + | None -> [%string "'%{message}'"] + | Some instruction_pointer -> + [%string "'%{message}' @ IP %{instruction_pointer#Int64.Hex}."]) +;; + (* Write perf_events into a file as a Fuschia trace (stack events). Events should be collected with --itrace=be or cre, and -F pid,tid,time,flags,addr,sym,symoff as per the constants defined above. *) @@ -955,6 +977,7 @@ let write_event (T t) ?events_writer event = maybe_stop_filtered_region t ~should_write; match event with | Error { thread = _; instruction_pointer; message; time = _ } -> + warn_decode_error ~instruction_pointer ~message; let name = sprintf !"[decode error: %s]" message in write_duration_instant t ~thread ~name ~time ~args:[]; let is_kernel_address = diff --git a/test/decode_errors.ml b/test/decode_errors.ml index c3ae45f31..762c98309 100644 --- a/test/decode_errors.ml +++ b/test/decode_errors.ml @@ -23,7 +23,9 @@ let%expect_test "decode error during memmove" = INPUT TRACE STREAM ENDED, any lines printed below this were deferred -> 21ns BEGIN itch_bbo::book::Book::add_order [inferred start time] -> 141ns END __memmove_avx_unaligned_erms - -> 141ns END itch_bbo::book::Book::add_order |}] + -> 141ns END itch_bbo::book::Book::add_order + Warning: perf reported an error decoding the trace: 'Overflow packet' @ IP 0x7ffff7327730. + ! |}] ;; let%expect_test "decode error during rust B-tree rebalance" = @@ -95,5 +97,8 @@ let%expect_test "decode error during rust B-tree rebalance" = -> 419ns END _int_free -> 419ns END merge_tracking_child_edge -> 420ns END remove_leaf_kv - INPUT TRACE STREAM ENDED, any lines printed below this were deferred |}] + INPUT TRACE STREAM ENDED, any lines printed below this were deferred + Warning: perf reported an error decoding the trace: 'Overflow packet' @ IP 0x7ffff7327730. + !Warning: perf reported an error decoding the trace: 'Overflow packet' @ IP 0x7ffff7327730. + ! |}] ;; diff --git a/test/page_fault.ml b/test/page_fault.ml index 811abd1b8..9e3126deb 100644 --- a/test/page_fault.ml +++ b/test/page_fault.ml @@ -643,5 +643,7 @@ let%expect_test "A page fault during demo.c" = -> 224ns BEGIN _dl_catch_exception [inferred start time] -> 224ns BEGIN _fini [inferred start time] -> 1.373us END _fini - -> 1.373us END _dl_catch_exception |}] + -> 1.373us END _dl_catch_exception + Warning: perf reported an error decoding the trace: 'Overflow packet (foo.so)' @ IP 0x7f5948676e18. + ! |}] ;;