From e3bd1156eb9d61058d978b0b15308fdf6eaba848 Mon Sep 17 00:00:00 2001 From: Alfredo Mazzinghi Date: Sat, 18 Dec 2021 17:45:29 +0000 Subject: [PATCH] Add instruction logging backend that emits a protobuf stream. This is inspired to the perfetto log backend and has two main use cases: - Provide a portable way to parse full instruction traces, without requiring all the perfetto backend dependencies. - Detect performance and disk-space issues with the perfetto backend serialization mechanism. Note that this backend only logs the instruction stream and does not support all the extra data framing, selection and aggregation that the perfetto backend performs. --- accel/tcg/log_instr.c | 8 +- accel/tcg/log_instr_protobuf.c | 307 ++++++++++++++++++++++++++++++ accel/tcg/meson.build | 5 + configure | 8 + disas.c | 37 ++++ include/disas/disas.h | 2 + include/exec/log_instr_internal.h | 7 + include/qemu/log_instr.h | 15 +- meson.build | 1 + softmmu/vl.c | 4 + trace_extra/meson.build | 2 + 11 files changed, 389 insertions(+), 7 deletions(-) create mode 100644 accel/tcg/log_instr_protobuf.c diff --git a/accel/tcg/log_instr.c b/accel/tcg/log_instr.c index 9a9ce40544b..7f14e928671 100644 --- a/accel/tcg/log_instr.c +++ b/accel/tcg/log_instr.c @@ -120,7 +120,13 @@ static trace_backend_hooks_t trace_backends[] = { { .init = init_perfetto_backend, .sync = sync_perfetto_backend, .emit_header = NULL, - .emit_instr = emit_perfetto_entry } + .emit_instr = emit_perfetto_entry }, +#endif +#ifdef CONFIG_TRACE_PROTOBUF + { .init = init_protobuf_backend, + .sync = sync_protobuf_backend, + .emit_header = NULL, + .emit_instr = emit_protobuf_entry }, #endif }; diff --git a/accel/tcg/log_instr_protobuf.c b/accel/tcg/log_instr_protobuf.c new file mode 100644 index 00000000000..486ef8fc1f1 --- /dev/null +++ b/accel/tcg/log_instr_protobuf.c @@ -0,0 +1,307 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2021 Alfredo Mazzinghi + * + * This software was developed by SRI International and the University of + * Cambridge Computer Laboratory (Department of Computer Science and + * Technology) under DARPA contract HR0011-18-C-0016 ("ECATS"), as part of the + * DARPA SSITH research programme. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Similar functionality as the text backend but with a more stable format. + */ + +#include + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "cpu.h" +#include "exec/log_instr.h" +#include "exec/log_instr_internal.h" +#include "exec/memop.h" +#include "disas/disas.h" + +#include "protobuf_backend_entry.pb-c.h" + +static FILE *protobuf_logfile = NULL; + +#ifdef TARGET_CHERI +static QEMULogInstrCapability *emit_protobuf_capability(cap_register_t *cap) +{ + QEMULogInstrCapability *pb_cap = g_malloc(sizeof(QEMULogInstrCapability)); + + qemulog_instr_capability__init(pb_cap); + pb_cap->valid = cap->cr_tag; + pb_cap->sealed = cap_is_sealed_with_type(cap); + pb_cap->perms = COMBINED_PERMS_VALUE(cap); + /* pb_cap->flags = cap_get_flags(cap); */ + pb_cap->otype = cap_get_otype_unsigned(cap); + pb_cap->cap_base = cap_get_base(cap); + pb_cap->cap_cursor = cap_get_cursor(cap); + pb_cap->cap_length = cap_get_length_sat(cap); + return pb_cap; +} +#endif + +static void emit_protobuf_meminfo(log_meminfo_t *minfo, QEMULogInstrMem **pb_mem) +{ + QEMULogInstrMem *mem = g_malloc(sizeof(QEMULogInstrMem)); + + qemulog_instr_mem__init(mem); + mem->is_load = (minfo->flags & LMI_LD) != 0; + mem->size = memop_size(minfo->op); + mem->addr = minfo->addr; +#ifndef TARGET_CHERI + assert((minfo->flags & LMI_CAP) == 0 && + "Capability memory access without CHERI support"); +#else + if (minfo->flags & LMI_CAP) { + mem->value_case = QEMULOG_INSTR_MEM__VALUE_CAP_VALUE; + mem->cap_value = emit_protobuf_capability(&minfo->cap); + } else +#endif + { + mem->value_case = QEMULOG_INSTR_MEM__VALUE_INT_VALUE; + mem->int_value = minfo->value; + } + *pb_mem = mem; +} + +static void release_protobuf_meminfo(QEMULogInstrMem *pb_mem) +{ + if (pb_mem->value_case == QEMULOG_INSTR_MEM__VALUE_CAP_VALUE) { + g_free(pb_mem->cap_value); + } + g_free(pb_mem); +} + +static void emit_protobuf_reginfo(log_reginfo_t *rinfo, QEMULogInstrReg **pb_reg) +{ + QEMULogInstrReg *reg = g_malloc(sizeof(QEMULogInstrReg)); + + qemulog_instr_reg__init(reg); + /* Safe to de-const cast as the pointer is only used during packing */ + reg->name = (char *)rinfo->name; + +#ifndef TARGET_CHERI + assert(!reginfo_is_cap(rinfo) && "Register marked as capability " + "register whitout CHERI support"); +#else + if (reginfo_is_cap(rinfo)) { + if (reginfo_has_cap(rinfo)) { + reg->value_case = QEMULOG_INSTR_REG__VALUE_CAP_VALUE; + reg->cap_value = emit_protobuf_capability(&rinfo->cap); + } else { + reg->value_case = QEMULOG_INSTR_REG__VALUE_INT_VALUE; + reg->int_value = rinfo->gpr; + } + } else +#endif + { + reg->value_case = QEMULOG_INSTR_REG__VALUE_INT_VALUE; + reg->int_value = rinfo->gpr; + } + *pb_reg = reg; +} + +static void release_protobuf_reginfo(QEMULogInstrReg *pb_reg) +{ + if (pb_reg->value_case == QEMULOG_INSTR_REG__VALUE_CAP_VALUE) { + g_free(pb_reg->cap_value); + } + g_free(pb_reg); +} + +static void emit_protobuf_event(log_event_t *evtinfo, QEMULogInstrEvt **pb_evt) +{ + QEMULogInstrEvt *evt = g_malloc(sizeof(QEMULogInstrEvt)); + + qemulog_instr_evt__init(evt); + switch (evtinfo->id) { + case LOG_EVENT_STATE: + evt->event_case = QEMULOG_INSTR_EVT__EVENT_STATE; + evt->state = g_malloc(sizeof(QEMULogInstrEvtTraceState)); + qemulog_instr_evt_trace_state__init(evt->state); + evt->state->next_state = (LogEventTraceState)evtinfo->state.next_state; + evt->state->pc = evtinfo->state.pc; + break; + case LOG_EVENT_CTX_UPDATE: + evt->event_case = QEMULOG_INSTR_EVT__EVENT_CTX_UPDATE; + evt->ctx_update = g_malloc(sizeof(QEMULogInstrEvtCtxUpdate)); + qemulog_instr_evt_ctx_update__init(evt->ctx_update); + evt->ctx_update->pid = evtinfo->ctx_update.pid; + evt->ctx_update->tid = evtinfo->ctx_update.tid; + evt->ctx_update->cid = evtinfo->ctx_update.cid; + /* Safe to de-const cast as the pointer is only used during packing */ + evt->ctx_update->mode = (char *)cpu_get_mode_name(evtinfo->ctx_update.mode); + break; + case LOG_EVENT_MARKER: + evt->event_case = QEMULOG_INSTR_EVT__EVENT_MARKER; + evt->marker = evtinfo->marker; + break; + default: + assert(0 && "unknown event ID"); + } + *pb_evt = evt; +} + +static void release_protobuf_event(QEMULogInstrEvt *pb_evt) +{ + if (pb_evt->event_case == QEMULOG_INSTR_EVT__EVENT_STATE) { + g_free(pb_evt->state); + } + if (pb_evt->event_case == QEMULOG_INSTR_EVT__EVENT_CTX_UPDATE) { + g_free(pb_evt->ctx_update); + } + g_free(pb_evt); +} + +void init_protobuf_backend(CPUArchState *env) +{ + /* + * TODO should handle a separate log file to avoid pollution of the main log file with + * binary data from protobufs. + * Since this seems to be a common pattern, maybe generalize it. + * + * XXX we may want to store a cache of protobuf structures in the backend data so + * that we avoid all the g_malloc() and g_free(). + */ + + if (protobuf_logfile == NULL) + protobuf_logfile = fopen("qemu-protobuf.pb", "w+b"); +} + +void sync_protobuf_backend(CPUArchState *env) +{ + if (protobuf_logfile != NULL) + fflush(protobuf_logfile); +} + +void emit_protobuf_entry(CPUArchState *env, cpu_log_entry_t *entry) +{ + QEMULogInstrEntry pb_entry = QEMULOG_INSTR_ENTRY__INIT; + QEMULogInstrExc pb_exc = QEMULOG_INSTR_EXC__INIT; + QEMULogInstrMem **pb_mem; + QEMULogInstrReg **pb_reg; + QEMULogInstrEvt **pb_evt; + uint8_t *buf; + size_t len; + uint32_t preamble = 0; + int i; + + pb_entry.pc = entry->pc; + + if (entry->flags & LI_FLAG_HAS_INSTR_DATA) { + pb_entry.insn_case = QEMULOG_INSTR_ENTRY__INSN_DISAS; + pb_entry.disas = disas_one_strbuf(env_cpu(env), entry->insn_bytes, + sizeof(entry->insn_bytes), entry->pc); + pb_entry.cpu = env_cpu(env)->cpu_index; + pb_entry.asid = entry->asid; + if (entry->flags & LI_FLAG_MODE_SWITCH) { + /* Safe to de-const cast as the pointer is only used during packing */ + pb_entry.mode = (char *)cpu_get_mode_name(entry->next_cpu_mode); + } + if (entry->flags & LI_FLAG_INTR_MASK) { + pb_entry.exception = &pb_exc; + pb_exc.vector = entry->intr_vector; + pb_exc.code = entry->intr_code; + if (entry->flags & LI_FLAG_INTR_TRAP) { + pb_exc.type = QEMULOG_INSTR_EXC_TYPE__TRAP; + pb_exc.faultaddr = entry->intr_faultaddr; + } else if (entry->flags & LI_FLAG_INTR_ASYNC) { + pb_exc.type = QEMULOG_INSTR_EXC_TYPE__INTR; + } else { + assert(0 && "invalid exception flags"); + } + } + if (entry->mem->len > 0) { + pb_mem = g_malloc(entry->mem->len * sizeof(QEMULogInstrMem *)); + pb_entry.n_mem = entry->mem->len; + pb_entry.mem = pb_mem; + for (i = 0; i < entry->mem->len; i++, pb_mem++) { + log_meminfo_t *minfo = &g_array_index(entry->mem, log_meminfo_t, i); + emit_protobuf_meminfo(minfo, pb_mem); + } + } + if (entry->regs->len > 0) { + pb_reg = g_malloc(entry->regs->len * sizeof(QEMULogInstrReg *)); + pb_entry.n_reg = entry->regs->len; + pb_entry.reg = pb_reg; + for (i = 0; i < entry->regs->len; i++, pb_reg++) { + log_reginfo_t *rinfo = &g_array_index(entry->regs, log_reginfo_t, i); + emit_protobuf_reginfo(rinfo, pb_reg); + } + } + } + + if (entry->events->len > 0) { + pb_evt = NULL; + pb_evt = g_malloc(entry->events->len * sizeof(QEMULogInstrEvt *)); + pb_entry.n_evt = entry->events->len; + pb_entry.evt = pb_evt; + for (i = 0; i < entry->events->len; i++, pb_evt++) { + log_event_t *event = &g_array_index(entry->events, log_event_t, i); + emit_protobuf_event(event, pb_evt); + } + } + + len = qemulog_instr_entry__get_packed_size(&pb_entry); + preamble = len; + assert(preamble == len && "Message preamble overflow"); + + buf = g_malloc(len + sizeof(preamble)); + *((uint32_t *)buf) = preamble; + qemulog_instr_entry__pack(&pb_entry, buf + sizeof(preamble)); + + /* technically we should not use the qemu logfile as it is not open in binary mode */ + qemu_flockfile(protobuf_logfile); + fwrite(buf, len + sizeof(preamble), 1, protobuf_logfile); + qemu_funlockfile(protobuf_logfile); + + if (entry->flags & LI_FLAG_HAS_INSTR_DATA) { + g_free(pb_entry.disas); + + if (entry->mem->len > 0) { + for (i = 0; i < entry->mem->len; i++) { + release_protobuf_meminfo(pb_entry.mem[i]); + } + g_free(pb_entry.mem); + } + if (entry->regs->len > 0) { + for (i = 0; i < entry->regs->len; i++) { + release_protobuf_reginfo(pb_entry.reg[i]); + } + g_free(pb_entry.reg); + } + } + if (entry->events->len > 0) { + for (i = 0; i < entry->events->len; i++) { + release_protobuf_event(pb_entry.evt[i]); + } + g_free(pb_entry.evt); + } + g_free(buf); +} diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build index 8a89e995472..23a4b0a30e0 100644 --- a/accel/tcg/meson.build +++ b/accel/tcg/meson.build @@ -17,3 +17,8 @@ specific_ss.add(when: ['CONFIG_TCG_LOG_INSTR', 'CONFIG_TCG'], if_true: files('lo specific_ss.add(when: ['CONFIG_TCG_LOG_INSTR', 'CONFIG_TCG'], if_true: files('log_instr_text.c')) specific_ss.add(when: ['CONFIG_TCG_LOG_INSTR', 'CONFIG_TCG'], if_true: files('log_instr_cvtrace.c')) specific_ss.add(when: ['CONFIG_TRACE_PERFETTO', 'CONFIG_TCG_LOG_INSTR', 'CONFIG_TCG'], if_true: files('log_instr_perfetto.c')) +specific_ss.add(when: ['CONFIG_TRACE_PROTOBUF', 'CONFIG_TCG_LOG_INSTR', 'CONFIG_TCG'], if_true: files('log_instr_protobuf.c')) + +if config_all.has_key('CONFIG_TCG_LOG_INSTR') and config_all.has_key('CONFIG_TRACE_PROTOBUF') + specific_ss.add(dependency('libprotobuf-c')) +endif diff --git a/configure b/configure index d897d6eaa3f..61d5879b5b2 100755 --- a/configure +++ b/configure @@ -341,6 +341,7 @@ gprof="no" debug_tcg="no" tcg_log_instr="yes" tcg_log_instr_perfetto="no" +tcg_log_instr_protobuf="no" rvfi_dii="no" debug="no" sanitizers="no" @@ -1041,6 +1042,10 @@ for opt do ;; --disable-perfetto-log-instr) tcg_log_instr_perfetto="no" ;; + --enable-protobuf-log-instr) tcg_log_instr_protobuf="yes" + ;; + --disable-protobuf-log-instr) tcg_log_instr_protobuf="no" + ;; --enable-rvfi-dii) rvfi_dii="yes" ;; --disable-rvfi-dii) rvfi_dii="no" @@ -5976,6 +5981,9 @@ fi if test "$tcg_log_instr_perfetto" = "yes" ; then echo "CONFIG_TRACE_PERFETTO=y" >> $config_host_mak fi +if test "$tcg_log_instr_protobuf" = "yes" ; then + echo "CONFIG_TRACE_PROTOBUF=y" >> $config_host_mak +fi if test "$rvfi_dii" = "yes" ; then echo "CONFIG_RVFI_DII=y" >> $config_host_mak fi diff --git a/disas.c b/disas.c index a0de01cc39a..a645c4ee68b 100644 --- a/disas.c +++ b/disas.c @@ -355,6 +355,43 @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size) return g_string_free(ds, false); } +/* + * Disassemble one instruction to a string buffer. + */ +char *disas_one_strbuf(CPUState *cpu, void *code, unsigned long size, target_ulong vma) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + CPUDebug s; + GString *ds = g_string_new(NULL); + + initialize_debug_target(&s, cpu); + s.info.fprintf_func = plugin_printf; + s.info.stream = (FILE *)ds; /* abuse this slot */ + s.info.buffer_vma = vma; + s.info.buffer_length = size; + +#ifdef TARGET_WORDS_BIGENDIAN + s.info.endian = BFD_ENDIAN_BIG; +#else + s.info.endian = BFD_ENDIAN_LITTLE; +#endif + + if (cc->disas_set_info) { + cc->disas_set_info(cpu, &s.info); + } + + if (s.info.cap_arch >= 0 && cap_disas_target(&s.info, vma, size)) { + ; /* done */ + } else if (s.info.print_insn) { + s.info.print_insn(vma, &s.info); + } else { + ; /* cannot disassemble -- return empty string */ + } + + /* Return the buffer, freeing the GString container. */ + return g_string_free(ds, false); +} + /* Disassemble this for me please... (debugging). */ void disas(FILE *out, const void *code, unsigned long size) { diff --git a/include/disas/disas.h b/include/disas/disas.h index ca3a801913f..362a9fae087 100644 --- a/include/disas/disas.h +++ b/include/disas/disas.h @@ -18,6 +18,8 @@ void monitor_disas(Monitor *mon, CPUState *cpu, char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size); +char *disas_one_strbuf(CPUState *cpu, void *code, unsigned long size, target_ulong vma); + /* Look up symbol for debugging purpose. Returns "" if unknown. */ const char *lookup_symbol(target_ulong orig_addr); #endif diff --git a/include/exec/log_instr_internal.h b/include/exec/log_instr_internal.h index 7201d0c61d6..da66596a583 100644 --- a/include/exec/log_instr_internal.h +++ b/include/exec/log_instr_internal.h @@ -167,10 +167,17 @@ void emit_text_instr(CPUArchState *env, cpu_log_entry_t *entry); /* CVTrace backend */ void emit_cvtrace_header(CPUArchState *env); void emit_cvtrace_entry(CPUArchState *env, cpu_log_entry_t *entry); +#ifdef CONFIG_TRACE_PERFETTO /* Perfetto backend */ void init_perfetto_backend(CPUArchState *env); void sync_perfetto_backend(CPUArchState *env); void emit_perfetto_entry(CPUArchState *env, cpu_log_entry_t *entry); +#endif +#ifdef CONFIG_TRACE_PROTOBUF +void init_protobuf_backend(CPUArchState *env); +void sync_protobuf_backend(CPUArchState *env); +void emit_protobuf_entry(CPUArchState *env, cpu_log_entry_t *entry); +#endif #ifdef CONFIG_DEBUG_TCG #define log_assert(x) assert((x)) diff --git a/include/qemu/log_instr.h b/include/qemu/log_instr.h index 120aed7974f..1fa1e1d1282 100644 --- a/include/qemu/log_instr.h +++ b/include/qemu/log_instr.h @@ -55,7 +55,10 @@ typedef enum { QEMU_LOG_INSTR_BACKEND_CVTRACE = 1, QEMU_LOG_INSTR_BACKEND_NOP = 2, #ifdef CONFIG_TRACE_PERFETTO - QEMU_LOG_INSTR_BACKEND_PERFETTO = 3 + QEMU_LOG_INSTR_BACKEND_PERFETTO = 3, +#endif +#ifdef CONFIG_TRACE_PROTOBUF + QEMU_LOG_INSTR_BACKEND_PROTOBUF = 4, #endif } qemu_log_instr_backend_t; @@ -111,9 +114,9 @@ typedef enum { * Tracing status changed (e.g. trace start/stop) */ typedef enum { - LOG_EVENT_STATE_START, - LOG_EVENT_STATE_STOP, - LOG_EVENT_STATE_FLUSH + LOG_EVENT_STATE_START = 0, + LOG_EVENT_STATE_STOP = 1, + LOG_EVENT_STATE_FLUSH = 2 } log_event_trace_state_t; typedef struct { @@ -124,14 +127,14 @@ typedef struct { typedef enum { /* Switch context from the current to the new (proc, thread, compartment) ID */ - LOG_EVENT_CTX_OP_SWITCH = 1, + LOG_EVENT_CTX_OP_SWITCH = 0, /* * Same as LOG_EVENT_CTX_OP_SWITCH but should bypass tracing activation * status, Meaning that these events will reach the backend even when * tracing is off. This is useful setup the correct context identifier * before switching on tracing. */ - LOG_EVENT_CTX_OP_SETUP = 2, + LOG_EVENT_CTX_OP_SETUP = 1, } log_event_ctx_update_op_t; /* diff --git a/meson.build b/meson.build index 8f228da301c..68d81e5659f 100644 --- a/meson.build +++ b/meson.build @@ -2182,6 +2182,7 @@ if config_all.has_key('CONFIG_TCG') summary_info += {'TCG interpreter': config_host.has_key('CONFIG_TCG_INTERPRETER')} summary_info += {'TCG instruction logging': config_host.has_key('CONFIG_TCG_LOG_INSTR')} summary_info += {'TCG instruction logging with google-perfetto': config_host.has_key('CONFIG_TRACE_PERFETTO')} + summary_info += {'TCG instruction logging with protobuf backend': config_host.has_key('CONFIG_TRACE_PROTOBUF')} endif summary_info += {'RVFI-DII': config_host.has_key('CONFIG_RVFI_DII')} summary_info += {'malloc trim support': has_malloc_trim} diff --git a/softmmu/vl.c b/softmmu/vl.c index f9d20853d09..fc4e44a3ca5 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -3758,6 +3758,10 @@ void qemu_init(int argc, char **argv, char **envp) #ifdef CONFIG_TRACE_PERFETTO } else if (strcmp(optarg, "perfetto") == 0) { qemu_log_instr_set_backend(QEMU_LOG_INSTR_BACKEND_PERFETTO); +#endif +#ifdef CONFIG_TRACE_PROTOBUF + } else if (strcmp(optarg, "protobuf") == 0) { + qemu_log_instr_set_backend(QEMU_LOG_INSTR_BACKEND_PROTOBUF); #endif } else { printf("Invalid choice for cheri-trace-format: '%s'\n", optarg); diff --git a/trace_extra/meson.build b/trace_extra/meson.build index 66424f3bcfa..08b0a48b03b 100644 --- a/trace_extra/meson.build +++ b/trace_extra/meson.build @@ -19,3 +19,5 @@ libqemutrace = static_library('qemutrace', '-DQEMU_PERFETTO']) qemuutil_libs += libqemutrace + +subdir('proto')