From ddba9b95eeb3cfb9ccb3d8401d1610d42f0e3aad Mon Sep 17 00:00:00 2001 From: Devin Gibson Date: Fri, 13 Dec 2024 15:40:43 -0500 Subject: [PATCH] feat(core-clp): Add `BoundedReader` to prevent out-of-bound reads in segmented input streams. (#624) Co-authored-by: haiqi96 <14502009+haiqi96@users.noreply.github.com> Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- components/core/CMakeLists.txt | 3 + components/core/src/clp/BoundedReader.cpp | 43 +++++++++ components/core/src/clp/BoundedReader.hpp | 89 ++++++++++++++++++ components/core/src/clp/StringReader.cpp | 4 + components/core/tests/test-BoundedReader.cpp | 99 ++++++++++++++++++++ 5 files changed, 238 insertions(+) create mode 100644 components/core/src/clp/BoundedReader.cpp create mode 100644 components/core/src/clp/BoundedReader.hpp create mode 100644 components/core/tests/test-BoundedReader.cpp diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 632d31afb..1cbe85e55 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -334,6 +334,8 @@ set(SOURCE_FILES_unitTest src/clp/aws/AwsAuthenticationSigner.cpp src/clp/aws/AwsAuthenticationSigner.hpp src/clp/aws/constants.hpp + src/clp/BoundedReader.cpp + src/clp/BoundedReader.hpp src/clp/BufferedFileReader.cpp src/clp/BufferedFileReader.hpp src/clp/BufferReader.cpp @@ -550,6 +552,7 @@ set(SOURCE_FILES_unitTest submodules/sqlite3/sqlite3ext.h tests/LogSuppressor.hpp tests/test-Array.cpp + tests/test-BoundedReader.cpp tests/test-BufferedFileReader.cpp tests/test-clp_s-end_to_end.cpp tests/test-EncodedVariableInterpreter.cpp diff --git a/components/core/src/clp/BoundedReader.cpp b/components/core/src/clp/BoundedReader.cpp new file mode 100644 index 000000000..9bca08f71 --- /dev/null +++ b/components/core/src/clp/BoundedReader.cpp @@ -0,0 +1,43 @@ +#include "BoundedReader.hpp" + +#include + +#include "ErrorCode.hpp" + +namespace clp { +auto BoundedReader::try_seek_from_begin(size_t pos) -> ErrorCode { + auto const next_pos = pos > m_bound ? m_bound : pos; + if (auto const rc = m_reader->try_seek_from_begin(next_pos); ErrorCode_Success != rc) { + m_curr_pos = ErrorCode_EndOfFile == rc ? next_pos : m_curr_pos; + return rc; + } + m_curr_pos = next_pos; + if (m_curr_pos >= m_bound) { + return ErrorCode_EndOfFile; + } + return ErrorCode_Success; +} + +auto BoundedReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) + -> ErrorCode { + if (m_curr_pos == m_bound) { + num_bytes_read = 0; + return ErrorCode_EndOfFile; + } + + if ((m_curr_pos + num_bytes_to_read) > m_bound) { + num_bytes_to_read = m_bound - m_curr_pos; + } + + auto const rc = m_reader->try_read(buf, num_bytes_to_read, num_bytes_read); + m_curr_pos += num_bytes_read; + if (ErrorCode_EndOfFile == rc) { + if (0 == num_bytes_read) { + return ErrorCode_EndOfFile; + } + } else if (ErrorCode_Success != rc) { + return rc; + } + return ErrorCode_Success; +} +} // namespace clp diff --git a/components/core/src/clp/BoundedReader.hpp b/components/core/src/clp/BoundedReader.hpp new file mode 100644 index 000000000..cfcb07422 --- /dev/null +++ b/components/core/src/clp/BoundedReader.hpp @@ -0,0 +1,89 @@ +#ifndef CLP_BOUNDEDREADER_HPP +#define CLP_BOUNDEDREADER_HPP + +#include +#include + +#include "ErrorCode.hpp" +#include "ReaderInterface.hpp" + +namespace clp { +/** + * BoundedReader is a ReaderInterface designed to wrap other ReaderInterfaces and prevent users + * from reading or seeking beyond a certain point in the underlying input stream. + * + * This is useful when the underlying input stream is divided into several logical segments and we + * want to prevent a reader for an earlier segment consuming any bytes from a later segment. In + * particular, reading part of a later segment may force the reader for that later segment to seek + * backwards, which can be either inefficient or impossible for certain kinds of input streams. + */ +class BoundedReader : public ReaderInterface { +public: + // Constructor + explicit BoundedReader(ReaderInterface* reader, size_t bound) + : m_reader{reader}, + m_bound{bound} { + if (nullptr == m_reader) { + throw ReaderInterface::OperationFailed(ErrorCode_BadParam, __FILE__, __LINE__); + } + m_curr_pos = m_reader->get_pos(); + if (m_curr_pos > m_bound) { + throw ReaderInterface::OperationFailed(ErrorCode_BadParam, __FILE__, __LINE__); + } + } + + // Methods implementing the ReaderInterface + /** + * Tries to get the current position of the read head in the underlying reader. + * @param pos Returns the position of the underlying reader's head + * @return ErrorCode_Success on success + * @return ErrorCode_errno on failure + */ + [[nodiscard]] auto try_get_pos(size_t& pos) -> ErrorCode override { + return m_reader->try_get_pos(pos); + } + + /** + * Tries to seek to the given position, limited by the bound. + * @param pos + * @return ErrorCode_Success on success + * @return ErrorCode_EndOfFile on EOF or if trying to seek beyond the checkpoint + * @return ErrorCode_errno on failure + */ + [[nodiscard]] auto try_seek_from_begin(size_t pos) -> ErrorCode override; + + /** + * Tries to read up to a given number of bytes from the file, limited by the bound. + * @param buf + * @param num_bytes_to_read The number of bytes to try and read + * @param num_bytes_read The actual number of bytes read + * @return ErrorCode_errno on error + * @return ErrorCode_EndOfFile on EOF or trying to read after hitting checkpoint + * @return ErrorCode_Success on success + */ + [[nodiscard]] auto + try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode override; + + /** + * This function is unsupported because BoundedReader can not delegate to a potentially + * efficient implementation in the underlying reader, as the underlying reader's implementation + * will not respect the bound. + * @return ErrorCode_Unsupported + */ + [[nodiscard]] auto try_read_to_delimiter( + [[maybe_unused]] char delim, + [[maybe_unused]] bool keep_delimiter, + [[maybe_unused]] bool append, + [[maybe_unused]] std::string& str + ) -> ErrorCode override { + return ErrorCode_Unsupported; + } + +private: + ReaderInterface* m_reader{nullptr}; + size_t m_bound{}; + size_t m_curr_pos{}; +}; +} // namespace clp + +#endif // CLP_BOUNDEDREADER_HPP diff --git a/components/core/src/clp/StringReader.cpp b/components/core/src/clp/StringReader.cpp index 9fa2c27d3..8dd0a3793 100644 --- a/components/core/src/clp/StringReader.cpp +++ b/components/core/src/clp/StringReader.cpp @@ -41,6 +41,10 @@ ErrorCode StringReader::try_read(char* buf, size_t num_bytes_to_read, size_t& nu } ErrorCode StringReader::try_seek_from_begin(size_t pos) { + if (pos > input_string.size()) { + this->pos = input_string.size(); + return ErrorCode_EndOfFile; + } this->pos = pos; return ErrorCode_Success; } diff --git a/components/core/tests/test-BoundedReader.cpp b/components/core/tests/test-BoundedReader.cpp new file mode 100644 index 000000000..9d1a9d2c0 --- /dev/null +++ b/components/core/tests/test-BoundedReader.cpp @@ -0,0 +1,99 @@ +#include +#include +#include +#include + +#include + +#include "../src/clp/BoundedReader.hpp" +#include "../src/clp/ErrorCode.hpp" +#include "../src/clp/StringReader.hpp" + +TEST_CASE("Test Bounded Reader", "[BoundedReader]") { + constexpr std::string_view cTestString{"0123456789"}; + + SECTION("BoundedReader does not support try_read_to_delimiter") { + clp::StringReader string_reader; + string_reader.open(std::string{cTestString}); + clp::BoundedReader bounded_reader{&string_reader, cTestString.size()}; + std::string tmp; + REQUIRE(clp::ErrorCode_Unsupported + == bounded_reader.try_read_to_delimiter('0', false, false, tmp)); + } + + SECTION("BoundedReader does not allow reads beyond end of underlying stream.") { + clp::StringReader string_reader; + string_reader.open(std::string{cTestString}); + clp::BoundedReader bounded_reader{&string_reader, cTestString.size() + 1}; + std::array buf{}; + size_t num_bytes_read{}; + auto rc = bounded_reader.try_read(buf.data(), cTestString.size() + 1, num_bytes_read); + REQUIRE(clp::ErrorCode_Success == rc); + REQUIRE(num_bytes_read == cTestString.size()); + REQUIRE(cTestString.size() == string_reader.get_pos()); + REQUIRE(cTestString.size() == bounded_reader.get_pos()); + } + + SECTION("BoundedReader does not allow reads beyond checkpoint.") { + clp::StringReader string_reader; + string_reader.open(std::string{cTestString}); + clp::BoundedReader bounded_reader{&string_reader, 1}; + std::array buf{}; + size_t num_bytes_read{}; + auto rc = bounded_reader.try_read(buf.data(), cTestString.size(), num_bytes_read); + REQUIRE(clp::ErrorCode_Success == rc); + REQUIRE(1 == num_bytes_read); + REQUIRE(1 == string_reader.get_pos()); + REQUIRE(1 == bounded_reader.get_pos()); + rc = bounded_reader.try_read(buf.data(), 1, num_bytes_read); + REQUIRE(clp::ErrorCode_EndOfFile == rc); + REQUIRE(0 == num_bytes_read); + REQUIRE(1 == string_reader.get_pos()); + REQUIRE(1 == bounded_reader.get_pos()); + } + + SECTION("BoundedReader does allow reads before checkpoint.") { + clp::StringReader string_reader; + string_reader.open(std::string{cTestString}); + clp::BoundedReader bounded_reader{&string_reader, 1}; + char buf{}; + size_t num_bytes_read{}; + auto rc = bounded_reader.try_read(&buf, 1, num_bytes_read); + REQUIRE(clp::ErrorCode_Success == rc); + REQUIRE(1 == num_bytes_read); + REQUIRE(1 == string_reader.get_pos()); + REQUIRE(1 == bounded_reader.get_pos()); + } + + SECTION("BoundedReader does not allow seeks beyond end of underlying stream.") { + clp::StringReader string_reader; + string_reader.open(std::string{cTestString}); + clp::BoundedReader bounded_reader{&string_reader, cTestString.size() + 1}; + auto rc = bounded_reader.try_seek_from_begin(cTestString.size() + 1); + REQUIRE(clp::ErrorCode_EndOfFile == rc); + REQUIRE(cTestString.size() == string_reader.get_pos()); + REQUIRE(cTestString.size() == bounded_reader.get_pos()); + } + + SECTION("BoundedReader does not allow seeks beyond checkpoint.") { + clp::StringReader string_reader; + string_reader.open(std::string{cTestString}); + clp::BoundedReader bounded_reader{&string_reader, 1}; + size_t num_bytes_read{}; + auto rc = bounded_reader.try_seek_from_begin(cTestString.size()); + REQUIRE(clp::ErrorCode_EndOfFile == rc); + REQUIRE(1 == string_reader.get_pos()); + REQUIRE(1 == bounded_reader.get_pos()); + } + + SECTION("BoundedReader does allow seeks before checkpoint.") { + clp::StringReader string_reader; + string_reader.open(std::string{cTestString}); + clp::BoundedReader bounded_reader{&string_reader, 2}; + size_t num_bytes_read{}; + auto rc = bounded_reader.try_seek_from_begin(1); + REQUIRE(clp::ErrorCode_Success == rc); + REQUIRE(1 == string_reader.get_pos()); + REQUIRE(1 == bounded_reader.get_pos()); + } +}