Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ROX-26025: Backport UTF-8 sanitizing fix for 4.5 #1863

Merged
merged 2 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ansible/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# TODO: unpin if RHEL 8 VMs on GCP update their python interpreter to 3.7+
# https://github.com/ansible/ansible/blob/v2.17.0/changelogs/CHANGELOG-v2.17.rst#removed-features-previously-deprecated
ansible<10
ansible-core==2.16.10
ansible==9.7.0
# TODO: unpin after https://github.com/docker/docker-py gets a release with
# https://github.com/docker/docker-py/commit/7785ad913ddf2d86478f08278bb2c488d05a29ff
requests==2.31.0
Expand Down
1 change: 1 addition & 0 deletions ansible/requirements.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
collections:
- google.cloud
- community.general
- community.docker
- containers.podman
Expand Down
41 changes: 31 additions & 10 deletions collector/lib/ProcessSignalFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,11 @@ std::string extract_proc_args(sinsp_threadinfo* tinfo) {
if (tinfo->m_args.empty()) return "";
std::ostringstream args;
for (auto it = tinfo->m_args.begin(); it != tinfo->m_args.end();) {
args << *it++;
auto arg = *it++;
auto arg_sanitized = SanitizedUTF8(arg);

args << ((arg_sanitized ? *arg_sanitized : arg));

if (it != tinfo->m_args.end()) args << " ";
}
return args.str();
Expand Down Expand Up @@ -106,22 +110,37 @@ ProcessSignal* ProcessSignalFormatter::CreateProcessSignal(sinsp_evt* event) {
const std::string* name = event_extractor_->get_comm(event);
const std::string* exepath = event_extractor_->get_exepath(event);

std::optional<std::string> name_sanitized;
std::optional<std::string> exepath_sanitized;

if (name) {
name_sanitized = SanitizedUTF8(*name);
}

if (exepath) {
exepath_sanitized = SanitizedUTF8(*exepath);
}

// set name (if name is missing or empty, try to use exec_file_path)
if (name && !name->empty() && *name != "<NA>") {
signal->set_name(*name);
signal->set_name(name_sanitized ? *name_sanitized : *name);
} else if (exepath && !exepath->empty() && *exepath != "<NA>") {
signal->set_name(*exepath);
signal->set_name(exepath_sanitized ? *exepath_sanitized : *exepath);
}

// set exec_file_path (if exec_file_path is missing or empty, try to use name)
if (exepath && !exepath->empty() && *exepath != "<NA>") {
signal->set_exec_file_path(*exepath);
signal->set_exec_file_path(exepath_sanitized ? *exepath_sanitized : *exepath);
} else if (name && !name->empty() && *name != "<NA>") {
signal->set_exec_file_path(*name);
signal->set_exec_file_path(name_sanitized ? *name_sanitized : *name);
}

// set process arguments
if (const char* args = event_extractor_->get_proc_args(event)) signal->set_args(args);
if (const char* args = event_extractor_->get_proc_args(event)) {
std::string args_str = args;
auto args_sanitized = SanitizedUTF8(args_str);
signal->set_args(args_sanitized ? *args_sanitized : args_str);
}

// set pid
if (const int64_t* pid = event_extractor_->get_pid(event)) signal->set_pid(*pid);
Expand Down Expand Up @@ -164,20 +183,22 @@ ProcessSignal* ProcessSignalFormatter::CreateProcessSignal(sinsp_threadinfo* tin
signal->set_id(UUIDStr());

const auto& name = tinfo->m_comm;
auto name_sanitized = SanitizedUTF8(name);
const auto& exepath = tinfo->m_exepath;
auto exepath_sanitized = SanitizedUTF8(exepath);

// set name (if name is missing or empty, try to use exec_file_path)
if (!name.empty() && name != "<NA>") {
signal->set_name(name);
signal->set_name(name_sanitized ? *name_sanitized : name);
} else if (!exepath.empty() && exepath != "<NA>") {
signal->set_name(exepath);
signal->set_name(exepath_sanitized ? *exepath_sanitized : exepath);
}

// set exec_file_path (if exec_file_path is missing or empty, try to use name)
if (!exepath.empty() && exepath != "<NA>") {
signal->set_exec_file_path(exepath);
signal->set_exec_file_path(exepath_sanitized ? *exepath_sanitized : exepath);
} else if (!name.empty() && name != "<NA>") {
signal->set_exec_file_path(name);
signal->set_exec_file_path(name_sanitized ? *name_sanitized : name);
}

// set the process as coming from a scrape as opposed to an exec
Expand Down
15 changes: 15 additions & 0 deletions collector/lib/Utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ extern "C" {

#include <libsinsp/sinsp.h>

#include <google/protobuf/stubs/common.h>

#include "HostInfo.h"
#include "Logging.h"
#include "Utility.h"
Expand Down Expand Up @@ -247,4 +249,17 @@ std::optional<std::string_view> ExtractContainerIDFromCgroup(std::string_view cg
}
return std::make_optional(container_id_part.substr(0, SHORT_CONTAINER_ID_LENGTH));
}

std::optional<std::string> SanitizedUTF8(const std::string& str) {
std::unique_ptr<char[]> work_buffer(new char[str.size()]);
char* sanitized;

sanitized = google::protobuf::internal::UTF8CoerceToStructurallyValid(str, work_buffer.get(), '?');

if (sanitized != work_buffer.get()) {
return std::nullopt;
}
return std::string(sanitized, str.size());
}

} // namespace collector
8 changes: 7 additions & 1 deletion collector/lib/Utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ ScopedLock<Mutex> Lock(Mutex& mutex) {
extern const std::string kKernelModulesDir;

std::optional<std::string_view> ExtractContainerIDFromCgroup(std::string_view cgroup);
} // namespace collector

// Replace any occurrence of an invalid UTF-8 sequence with the '?' character
// Returns :
// - a new string with invalid characters replaced.
// - nullopt if there is no invalid character (the input string is valid).
std::optional<std::string> SanitizedUTF8(const std::string& str);

} // namespace collector
#endif // _UTILITY_H_
21 changes: 21 additions & 0 deletions collector/test/UtilityTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,25 @@ TEST(ExtractContainerIDFromCgroupTest, TestExtractContainerIDFromCgroup) {
EXPECT_EQ(short_container_id, c.expected_output);
}
}

TEST(SanitizeUTF8Test, TestSanitizeUTF8_Invalid) {
std::string input(
"ab\x80"
"cd");
std::string expected_output("ab?cd");

auto output = SanitizedUTF8(input);

EXPECT_TRUE(output.has_value());
EXPECT_EQ(*output, expected_output);
}

TEST(SanitizeUTF8Test, TestSanitizeUTF8_Valid) {
std::string input("abcd");

auto output = SanitizedUTF8(input);

EXPECT_FALSE(output);
}

} // namespace collector
Loading