From cba3555d341dc702d156c6d57fc380c93275a3f4 Mon Sep 17 00:00:00 2001 From: Melissa Kilby Date: Sun, 11 Feb 2024 02:40:05 +0000 Subject: [PATCH] cleanup(cri): cleanup lookup_status handling * Now more precise, logical and clean, set STARTED at the beginning and SUCCESSFUL at the end of `parse_container_json_evt` if and only if the container image was successfully retrieved and present before calling `add_container` or if it's a pod sanbox container. * In `lookup_sync` we previously set the lookup_status to SUCCESSFUL, which can be avoided to have a more consolidated and clean behavior in `parse_container_json_evt`. Signed-off-by: Melissa Kilby --- .../container_async_source.tpp | 1 - userspace/libsinsp/container_engine/cri.cpp | 24 +++++++------ userspace/libsinsp/parsers.cpp | 35 ++++++++++++------- 3 files changed, 37 insertions(+), 23 deletions(-) diff --git a/userspace/libsinsp/container_engine/container_async_source.tpp b/userspace/libsinsp/container_engine/container_async_source.tpp index c02c392da1c..8b3f48ebf68 100644 --- a/userspace/libsinsp/container_engine/container_async_source.tpp +++ b/userspace/libsinsp/container_engine/container_async_source.tpp @@ -58,7 +58,6 @@ bool container_async_source::lookup(const key_type& key, template bool container_async_source::lookup_sync(const key_type& key, sinsp_container_info& value) { - value.set_lookup_status(sinsp_container_lookup::state::SUCCESSFUL); value.m_type = container_type(key); value.m_id = container_id(key); diff --git a/userspace/libsinsp/container_engine/cri.cpp b/userspace/libsinsp/container_engine/cri.cpp index 73174a772ff..1ecc283afc8 100644 --- a/userspace/libsinsp/container_engine/cri.cpp +++ b/userspace/libsinsp/container_engine/cri.cpp @@ -193,7 +193,6 @@ bool cri::resolve(sinsp_threadinfo *tinfo, bool query_os_for_missing_info) "cri (%s): Performing lookup", container_id.c_str()); - container.set_lookup_status(sinsp_container_lookup::state::SUCCESSFUL); libsinsp::cgroup_limits::cgroup_limits_key key( container.m_id, tinfo->get_cgroup("cpu"), @@ -244,24 +243,29 @@ bool cri::resolve(sinsp_threadinfo *tinfo, bool query_os_for_missing_info) libsinsp_logger()->format(sinsp_logger::SEV_DEBUG, "cri_async (%s): Starting synchronous lookup", container_id.c_str()); - // lookup_sync function directly invokes the container engine specific parser `parse` + // `lookup_sync` function directly invokes the container engine specific parser `parse` done = m_async_source->lookup_sync(key, result); - // explicitly check for the most crucial retrieved value to be present - if(!result.m_image.empty()) + if(!result.m_image.empty() || result.is_pod_sandbox()) { /* * Only for synchronous lookup option (e.g. Falco's default is async not sync) * - * Fast-track addition of enriched containers (fields successfully retrieved from the container runtime socket) - * to the container cache, bypassing the round-trip process: + * Explicitly check for the most crucial retrieved value (`m_image`) to be present before enabling the + * fast-track container add option. At this point, the container with only the cgroup (container id) was + * already added to the cache. Therefore, we can proceed to call `replace_container`. + * + * Bypassing the round-trip process: * `source_callback` -> `notify_new_container` -> * `container_to_sinsp_event(container_to_json(container_info), ...)` -> * `parse_container_json_evt` -> `m_inspector->m_container_manager.add_container()` * - * Although we still re-add the container in `parse_container_json_evt` to also support native 'container' events, it - * introduces an avoidable delay in the incoming syscall event stream. Syscall events do not explicitly require container - * events and instead directly retrieve container details from the container cache. This behavior could potentially - * contribute to the issues noted by adopters, such as the absence of container images in syscall events. + * In `parse_container_json_evt`, we still re-add the container to support native 'container' events + * and new container callbacks that may expect the container as JSON in the artificial sinsp evt. + * However, we can avoid delays by storing the container struct in the container cache now. + * This is beneficial because syscall events do not explicitly require container events, instead, + * they directly retrieve container details from the container cache. This new feature can mitigate + * issues noted by adopters, such as the absence of container images in syscall events even when + * disabling async lookups. */ cache->replace_container(std::make_shared(result)); } diff --git a/userspace/libsinsp/parsers.cpp b/userspace/libsinsp/parsers.cpp index af83388f0c7..508092942fd 100644 --- a/userspace/libsinsp/parsers.cpp +++ b/userspace/libsinsp/parsers.cpp @@ -5080,22 +5080,11 @@ void sinsp_parser::parse_container_json_evt(sinsp_evt *evt) const Json::Value& lookup_state = container["lookup_state"]; if(check_json_val_is_convertible(lookup_state, Json::uintValue, "lookup_state")) { - container_info->set_lookup_status(static_cast(lookup_state.asUInt())); - switch(container_info->get_lookup_status()) - { - case sinsp_container_lookup::state::STARTED: - case sinsp_container_lookup::state::SUCCESSFUL: - case sinsp_container_lookup::state::FAILED: - break; - default: - container_info->set_lookup_status(sinsp_container_lookup::state::SUCCESSFUL); - } - // state == STARTED doesn't make sense in a scap file // as there's no actual lookup that would ever finish if(!evt->get_tinfo_ref() && container_info->get_lookup_status() == sinsp_container_lookup::state::STARTED) { - SINSP_DEBUG("Rewriting lookup_state = STARTED from scap file to FAILED for container %s", + SINSP_DEBUG("Rewriting lookup_state == STARTED from scap file to FAILED for container %s", container_info->m_id.c_str()); container_info->set_lookup_status(sinsp_container_lookup::state::FAILED); } @@ -5241,6 +5230,28 @@ void sinsp_parser::parse_container_json_evt(sinsp_evt *evt) } evt->set_tinfo_ref(container_info->get_tinfo(m_inspector)); evt->set_tinfo(evt->get_tinfo_ref().get()); + container_info->set_lookup_status(static_cast(lookup_state.asUInt())); + switch(container_info->get_lookup_status()) + { + // Preserve case where lookup_state == STARTED from scap file was set to FAILED + // or otherwise set to false. + case sinsp_container_lookup::state::FAILED: + break; + default: + { + if (!container_info->m_image.empty() || container_info->is_pod_sandbox()) + { + // Marked as SUCCESSFUL if and only if the container image field present or + // if it's a pod sandbox container + container_info->set_lookup_status(sinsp_container_lookup::state::SUCCESSFUL); + } else + { + container_info->set_lookup_status(sinsp_container_lookup::state::FAILED); + } + } + } + // `add_container` also triggers the callbacks, therefore always continue even if the + // lookup_status was set to FAILED m_inspector->m_container_manager.add_container(container_info, evt->get_thread_info(true)); /* SINSP_STR_DEBUG("Container\n-------\nID:" + container_info.m_id +