From 220603744f66641ebbce7af37f46f1a5217478d8 Mon Sep 17 00:00:00 2001 From: Jason Dellaluce Date: Fri, 12 Apr 2024 12:09:45 +0000 Subject: [PATCH] update(libsinsp): support field transformers in output formatters Co-authored-by: Andrea Terzolo Signed-off-by: Jason Dellaluce --- userspace/libsinsp/eventformatter.cpp | 253 +++++++++++++------------- userspace/libsinsp/eventformatter.h | 50 +++-- 2 files changed, 166 insertions(+), 137 deletions(-) diff --git a/userspace/libsinsp/eventformatter.cpp b/userspace/libsinsp/eventformatter.cpp index dc9601c4b6..9f347e9127 100644 --- a/userspace/libsinsp/eventformatter.cpp +++ b/userspace/libsinsp/eventformatter.cpp @@ -17,14 +17,11 @@ limitations under the License. */ #include -#include -#include #include #include +#include -/////////////////////////////////////////////////////////////////////////////// -// rawstring_check implementation -/////////////////////////////////////////////////////////////////////////////// +static constexpr const char* s_not_available_str = ""; sinsp_evt_formatter::sinsp_evt_formatter(sinsp* inspector, filter_check_list &available_checks) @@ -55,21 +52,19 @@ sinsp_evt_formatter::sinsp_evt_formatter(sinsp* inspector, void sinsp_evt_formatter::set_format(output_format of, const std::string& fmt) { - uint32_t j; - uint32_t last_nontoken_str_start = 0; - std::string lfmt(fmt); - - m_output_format = of; - - if(lfmt == "") + if(fmt.empty()) { throw sinsp_exception("empty formatting token"); } + m_output_tokens.clear(); + m_output_format = of; + // // If the string starts with a *, it means that we are ok with printing // the string even when not all the values it specifies are set. // + std::string lfmt(fmt); if(lfmt[0] == '*') { m_require_all_values = false; @@ -84,10 +79,10 @@ void sinsp_evt_formatter::set_format(output_format of, const std::string& fmt) // Parse the string and extract the tokens // const char* cfmt = lfmt.c_str(); - - m_tokens.clear(); + std::shared_ptr chk; uint32_t lfmtlen = (uint32_t)lfmt.length(); - + uint32_t last_nontoken_str_start = 0; + uint32_t j = 0; for(j = 0; j < lfmtlen; j++) { if(cfmt[j] == '%') @@ -96,10 +91,9 @@ void sinsp_evt_formatter::set_format(output_format of, const std::string& fmt) if(last_nontoken_str_start != j) { - auto newtkn = std::make_unique(lfmt.substr(last_nontoken_str_start, j - last_nontoken_str_start)); - m_tokens.emplace_back(std::make_pair("", newtkn.get())); - m_tokenlens.push_back(0); - m_checks.push_back(std::move(newtkn)); + auto newtkn = std::make_shared(lfmt.substr(last_nontoken_str_start, j - last_nontoken_str_start)); + m_output_tokens.emplace_back(newtkn); + m_output_tokenlens.push_back(0); } if(j == lfmtlen - 1) @@ -108,7 +102,7 @@ void sinsp_evt_formatter::set_format(output_format of, const std::string& fmt) } // - // If the field specifier starts with a number, it means that we have a length modifier + // If the field specifier starts with a number, it means that we have a length transformer // if(isdigit(cfmt[j + 1])) { @@ -138,25 +132,82 @@ void sinsp_evt_formatter::set_format(output_format of, const std::string& fmt) } } - auto chk = m_available_checks.new_filter_check_from_fldname(std::string_view(cfmt + j + 1), - m_inspector, - false); + // start parsing the token, which at this point must be a valid + // field or a valid field transformer + int msize = 0; + const char* tstart = cfmt + j + 1; + std::vector transformers; + while(true) + { + auto prev_size = msize; + for (const auto& tr : libsinsp::filter::parser::supported_field_transformers()) + { + if ((j + 1 + tr.size() + 1) < lfmtlen + && tstart[msize + tr.size()] == '(' + && !strncmp(tstart + msize, tr.c_str(), tr.size())) + { + transformers.emplace_back(filter_transformer_from_str(tr)); + msize += tr.size() + 1; // count '(' + j += tr.size() + 1; + } + } + // note: no whitespace is allowed between transformers + if (prev_size == msize) + { + break; + } + } + // read field token and make sure it's a valid one + const char* fstart = cfmt + j + 1; + chk = m_available_checks.new_filter_check_from_fldname( + std::string_view(fstart), m_inspector, false); if(chk == nullptr) { - throw sinsp_exception("invalid formatting token " + std::string(cfmt + j + 1)); + throw sinsp_exception("invalid formatting token " + std::string(fstart)); } - - const char * fstart = cfmt + j + 1; uint32_t fsize = chk->parse_field_name(fstart, true, false); - j += fsize; - ASSERT(j <= lfmt.length()); + ASSERT(j <= lfmtlen); + + // we always add the field with no transformers for key->value resolution + m_resolution_tokens.emplace_back(std::string(fstart, fsize), chk, false); + + // if we have transformers, create a copy of the field and use it + // both for output substitution and for key->value resolution + if (!transformers.empty()) + { + chk = m_available_checks.new_filter_check_from_fldname( + fstart, m_inspector, false); + if(chk == nullptr) + { + throw sinsp_exception("invalid formatting token " + std::string(fstart)); + } + chk->parse_field_name(fstart, true, false); - m_tokens.emplace_back(std::make_pair(std::string(fstart, fsize), chk.get())); - m_tokenlens.push_back(toklen); + // apply all transformers and pop back their ')' enclosing token + // note: we apply transformers in reserve order to preserve their semantics + for (auto rit = transformers.rbegin(); rit != transformers.rend(); ++rit) + { + chk->add_transformer(*rit); + + // note: no whitespace is allowed between transformer enclosing + if (j + 1 >= lfmtlen || cfmt[j + 1] != ')') + { + throw sinsp_exception("missing closing transformer parenthesis: " + std::string(cfmt + j)); + } + j++; + msize++; // count ')' + } + + // when requested to do so, we'll resolve the field with transformers + // in addition to the non-transformed version + m_resolution_tokens.emplace_back(std::string(tstart, fsize + msize), chk, true); + } - m_checks.push_back(std::move(chk)); + // add field for output substitution + m_output_tokens.emplace_back(chk); + m_output_tokenlens.push_back(toklen); last_nontoken_str_start = j + 1; } @@ -164,52 +215,34 @@ void sinsp_evt_formatter::set_format(output_format of, const std::string& fmt) if(last_nontoken_str_start != j) { - auto chk = std::make_unique(lfmt.substr(last_nontoken_str_start, j - last_nontoken_str_start)); - m_tokens.emplace_back(std::make_pair("", chk.get())); - m_checks.emplace_back(std::move(chk)); - m_tokenlens.push_back(0); + auto chk = std::make_shared(lfmt.substr(last_nontoken_str_start, j - last_nontoken_str_start)); + m_output_tokens.emplace_back(chk); + m_output_tokenlens.push_back(0); } } -bool sinsp_evt_formatter::on_capture_end(OUT std::string* res) -{ - res->clear(); - return res->size() > 0; -} - bool sinsp_evt_formatter::resolve_tokens(sinsp_evt *evt, std::map& values) { - bool retval = true; - const filtercheck_field_info* fi; - uint32_t j = 0; - - ASSERT(m_tokenlens.size() == m_tokens.size()); - - for(j = 0; j < m_tokens.size(); j++) + for(const auto& t : m_resolution_tokens) { - char* str = m_tokens[j].second->tostring(evt); + if (t.has_transformers && !m_resolve_transformed_fields) + { + continue; + } + const char* str = t.token->tostring(evt); if(str == NULL) { if(m_require_all_values) { - retval = false; - break; - } - else - { - str = (char*)""; + return false; } - } - fi = m_tokens[j].second->get_field_info(); - if(fi && strncmp(fi->m_name, "NA", sizeof("NA") + 1) != 0) - { - values[m_tokens[j].first] = std::string(str); + str = s_not_available_str; } + values[t.name] = str; } - - return retval; + return true; } bool sinsp_evt_formatter::get_field_values(sinsp_evt *evt, std::map &fields) @@ -219,14 +252,9 @@ bool sinsp_evt_formatter::get_field_values(sinsp_evt *evt, std::map &fields) { - for(size_t i = 0; i < m_tokens.size(); i++) + for(const auto& t : m_resolution_tokens) { - if(m_tokens[i].first == "") - { - continue; - } - - fields.emplace_back(m_tokens[i].first); + fields.emplace_back(t.name); } } @@ -237,81 +265,62 @@ sinsp_evt_formatter::output_format sinsp_evt_formatter::get_output_format() bool sinsp_evt_formatter::tostring_withformat(sinsp_evt* evt, std::string &output, output_format of) { - bool retval = true; - const filtercheck_field_info* fi; - - uint32_t j = 0; output.clear(); - ASSERT(m_tokenlens.size() == m_tokens.size()); - - for(j = 0; j < m_tokens.size(); j++) + if(of == OF_JSON) { - if(of == OF_JSON) + bool retval = true; + for (const auto& t : m_resolution_tokens) { - Json::Value json_value = m_tokens[j].second->tojson(evt); - - if(retval == false) + if (t.has_transformers && !m_resolve_transformed_fields) { + // always skip keys with transformers here + // todo!: is this the desired behavior? continue; } - + Json::Value json_value = t.token->tojson(evt); if(json_value == Json::nullValue && m_require_all_values) { retval = false; - continue; - } - - fi = m_tokens[j].second->get_field_info(); - if(fi && strncmp(fi->m_name, "NA", sizeof("NA") + 1) != 0) - { - m_root[m_tokens[j].first] = m_tokens[j].second->tojson(evt); + break; } + m_root[t.name] = t.token->tojson(evt); } - else - { - char* str = m_tokens[j].second->tostring(evt); - - if(retval == false) - { - continue; - } - - if(str == NULL) - { - if(m_require_all_values) - { - retval = false; - continue; - } - else - { - str = (char*)""; - } - } - - uint32_t tks = m_tokenlens[j]; + output = m_writer.write(m_root); + output = output.substr(0, output.size() - 1); + return retval; + } - if(tks != 0) + ASSERT(m_output_tokenlens.size() == m_output_tokens.size()); + for(size_t j = 0; j < m_output_tokens.size(); j++) + { + const char* str = m_output_tokens[j]->tostring(evt); + if(str == NULL) + { + if(m_require_all_values) { - std::string sstr(str); - sstr.resize(tks, ' '); - output += sstr; + return false; } else { - output += str; + str = s_not_available_str; } } - } - if(of == OF_JSON) - { - output = m_writer.write(m_root); - output = output.substr(0, output.size() - 1); + uint32_t tks = m_output_tokenlens[j]; + if(tks != 0) + { + std::string sstr(str); + sstr.resize(tks, ' '); + output += sstr; + } + else + { + output += str; + } } - return retval; + return true; } bool sinsp_evt_formatter::tostring(sinsp_evt* evt, std::string& res) diff --git a/userspace/libsinsp/eventformatter.h b/userspace/libsinsp/eventformatter.h index e28d86b32c..213c3dc8f7 100644 --- a/userspace/libsinsp/eventformatter.h +++ b/userspace/libsinsp/eventformatter.h @@ -63,10 +63,9 @@ class SINSP_PUBLIC sinsp_evt_formatter map. \param evt Pointer to the event to be converted into string. - \param res Reference to the map that will be filled with the result. + \param values Reference to the map that will be filled with the result. - \return true if all the tokens can be retrieved successfully, false - otherwise. + \return true if all the tokens can be retrieved successfully, false otherwise. */ bool resolve_tokens(sinsp_evt *evt, std::map& values); @@ -101,26 +100,47 @@ class SINSP_PUBLIC sinsp_evt_formatter virtual bool tostring_withformat(sinsp_evt* evt, std::string &output, output_format of); - /*! - \brief Fills res with end of capture string rendering of the event. - \param res Pointer to the string that will be filled with the result. - - \return true if there is a string to show (based on the format), - false otherwise. + /** + * \brief If true, when resolving tokens in key -> value mappings (e.g. + * with `resolve_tokens` or `tostring` with JSON output format), the result + * will include fields with their applied transformers. The version of fields + * with no transformers will be included in results in any case regardless + * of this property. */ - bool on_capture_end(OUT std::string* res); + inline bool get_resolve_transformed_fields() const + { + return m_resolve_transformed_fields; + } + + inline void set_resolve_transformed_fields(bool v) + { + m_resolve_transformed_fields = v; + } private: + using token_t = std::shared_ptr; + + struct resolution_token + { + std::string name; + token_t token; + bool has_transformers = false; + + resolution_token(const std::string& n, token_t t, bool h) + : name(n), token(t), has_transformers(h) { } + }; + output_format m_output_format; // vector of (full string of the token, filtercheck) pairs // e.g. ("proc.aname[2], ptr to sinsp_filter_check_thread) - std::vector> m_tokens; - std::vector m_tokenlens; - sinsp* m_inspector; + std::vector m_output_tokens; + std::vector m_output_tokenlens; + std::vector m_resolution_tokens; + sinsp* m_inspector = nullptr; filter_check_list &m_available_checks; - bool m_require_all_values; - std::vector> m_checks; + bool m_require_all_values = false; + bool m_resolve_transformed_fields = false; Json::Value m_root; Json::FastWriter m_writer;