Skip to content

Commit

Permalink
feat(core-clp): Add BoundedReader to prevent out-of-bound reads in …
Browse files Browse the repository at this point in the history
…segmented input streams. (#624)

Co-authored-by: haiqi96 <[email protected]>
Co-authored-by: Lin Zhihao <[email protected]>
  • Loading branch information
3 people authored Dec 13, 2024
1 parent 909e19e commit ddba9b9
Show file tree
Hide file tree
Showing 5 changed files with 238 additions and 0 deletions.
3 changes: 3 additions & 0 deletions components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,8 @@ set(SOURCE_FILES_unitTest
src/clp/aws/AwsAuthenticationSigner.cpp
src/clp/aws/AwsAuthenticationSigner.hpp
src/clp/aws/constants.hpp
src/clp/BoundedReader.cpp
src/clp/BoundedReader.hpp
src/clp/BufferedFileReader.cpp
src/clp/BufferedFileReader.hpp
src/clp/BufferReader.cpp
Expand Down Expand Up @@ -550,6 +552,7 @@ set(SOURCE_FILES_unitTest
submodules/sqlite3/sqlite3ext.h
tests/LogSuppressor.hpp
tests/test-Array.cpp
tests/test-BoundedReader.cpp
tests/test-BufferedFileReader.cpp
tests/test-clp_s-end_to_end.cpp
tests/test-EncodedVariableInterpreter.cpp
Expand Down
43 changes: 43 additions & 0 deletions components/core/src/clp/BoundedReader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#include "BoundedReader.hpp"

#include <cstddef>

#include "ErrorCode.hpp"

namespace clp {
auto BoundedReader::try_seek_from_begin(size_t pos) -> ErrorCode {
auto const next_pos = pos > m_bound ? m_bound : pos;
if (auto const rc = m_reader->try_seek_from_begin(next_pos); ErrorCode_Success != rc) {
m_curr_pos = ErrorCode_EndOfFile == rc ? next_pos : m_curr_pos;
return rc;
}
m_curr_pos = next_pos;
if (m_curr_pos >= m_bound) {
return ErrorCode_EndOfFile;
}
return ErrorCode_Success;
}

auto BoundedReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read)
-> ErrorCode {
if (m_curr_pos == m_bound) {
num_bytes_read = 0;
return ErrorCode_EndOfFile;
}

if ((m_curr_pos + num_bytes_to_read) > m_bound) {
num_bytes_to_read = m_bound - m_curr_pos;
}

auto const rc = m_reader->try_read(buf, num_bytes_to_read, num_bytes_read);
m_curr_pos += num_bytes_read;
if (ErrorCode_EndOfFile == rc) {
if (0 == num_bytes_read) {
return ErrorCode_EndOfFile;
}
} else if (ErrorCode_Success != rc) {
return rc;
}
return ErrorCode_Success;
}
} // namespace clp
89 changes: 89 additions & 0 deletions components/core/src/clp/BoundedReader.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#ifndef CLP_BOUNDEDREADER_HPP
#define CLP_BOUNDEDREADER_HPP

#include <cstddef>
#include <string>

#include "ErrorCode.hpp"
#include "ReaderInterface.hpp"

namespace clp {
/**
* BoundedReader is a ReaderInterface designed to wrap other ReaderInterfaces and prevent users
* from reading or seeking beyond a certain point in the underlying input stream.
*
* This is useful when the underlying input stream is divided into several logical segments and we
* want to prevent a reader for an earlier segment consuming any bytes from a later segment. In
* particular, reading part of a later segment may force the reader for that later segment to seek
* backwards, which can be either inefficient or impossible for certain kinds of input streams.
*/
class BoundedReader : public ReaderInterface {
public:
// Constructor
explicit BoundedReader(ReaderInterface* reader, size_t bound)
: m_reader{reader},
m_bound{bound} {
if (nullptr == m_reader) {
throw ReaderInterface::OperationFailed(ErrorCode_BadParam, __FILE__, __LINE__);
}
m_curr_pos = m_reader->get_pos();
if (m_curr_pos > m_bound) {
throw ReaderInterface::OperationFailed(ErrorCode_BadParam, __FILE__, __LINE__);
}
}

// Methods implementing the ReaderInterface
/**
* Tries to get the current position of the read head in the underlying reader.
* @param pos Returns the position of the underlying reader's head
* @return ErrorCode_Success on success
* @return ErrorCode_errno on failure
*/
[[nodiscard]] auto try_get_pos(size_t& pos) -> ErrorCode override {
return m_reader->try_get_pos(pos);
}

/**
* Tries to seek to the given position, limited by the bound.
* @param pos
* @return ErrorCode_Success on success
* @return ErrorCode_EndOfFile on EOF or if trying to seek beyond the checkpoint
* @return ErrorCode_errno on failure
*/
[[nodiscard]] auto try_seek_from_begin(size_t pos) -> ErrorCode override;

/**
* Tries to read up to a given number of bytes from the file, limited by the bound.
* @param buf
* @param num_bytes_to_read The number of bytes to try and read
* @param num_bytes_read The actual number of bytes read
* @return ErrorCode_errno on error
* @return ErrorCode_EndOfFile on EOF or trying to read after hitting checkpoint
* @return ErrorCode_Success on success
*/
[[nodiscard]] auto
try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode override;

/**
* This function is unsupported because BoundedReader can not delegate to a potentially
* efficient implementation in the underlying reader, as the underlying reader's implementation
* will not respect the bound.
* @return ErrorCode_Unsupported
*/
[[nodiscard]] auto try_read_to_delimiter(
[[maybe_unused]] char delim,
[[maybe_unused]] bool keep_delimiter,
[[maybe_unused]] bool append,
[[maybe_unused]] std::string& str
) -> ErrorCode override {
return ErrorCode_Unsupported;
}

private:
ReaderInterface* m_reader{nullptr};
size_t m_bound{};
size_t m_curr_pos{};
};
} // namespace clp

#endif // CLP_BOUNDEDREADER_HPP
4 changes: 4 additions & 0 deletions components/core/src/clp/StringReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ ErrorCode StringReader::try_read(char* buf, size_t num_bytes_to_read, size_t& nu
}

ErrorCode StringReader::try_seek_from_begin(size_t pos) {
if (pos > input_string.size()) {
this->pos = input_string.size();
return ErrorCode_EndOfFile;
}
this->pos = pos;
return ErrorCode_Success;
}
Expand Down
99 changes: 99 additions & 0 deletions components/core/tests/test-BoundedReader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#include <array>
#include <cstddef>
#include <string>
#include <string_view>

#include <Catch2/single_include/catch2/catch.hpp>

#include "../src/clp/BoundedReader.hpp"
#include "../src/clp/ErrorCode.hpp"
#include "../src/clp/StringReader.hpp"

TEST_CASE("Test Bounded Reader", "[BoundedReader]") {
constexpr std::string_view cTestString{"0123456789"};

SECTION("BoundedReader does not support try_read_to_delimiter") {
clp::StringReader string_reader;
string_reader.open(std::string{cTestString});
clp::BoundedReader bounded_reader{&string_reader, cTestString.size()};
std::string tmp;
REQUIRE(clp::ErrorCode_Unsupported
== bounded_reader.try_read_to_delimiter('0', false, false, tmp));
}

SECTION("BoundedReader does not allow reads beyond end of underlying stream.") {
clp::StringReader string_reader;
string_reader.open(std::string{cTestString});
clp::BoundedReader bounded_reader{&string_reader, cTestString.size() + 1};
std::array<char, cTestString.size() + 1> buf{};
size_t num_bytes_read{};
auto rc = bounded_reader.try_read(buf.data(), cTestString.size() + 1, num_bytes_read);
REQUIRE(clp::ErrorCode_Success == rc);
REQUIRE(num_bytes_read == cTestString.size());
REQUIRE(cTestString.size() == string_reader.get_pos());
REQUIRE(cTestString.size() == bounded_reader.get_pos());
}

SECTION("BoundedReader does not allow reads beyond checkpoint.") {
clp::StringReader string_reader;
string_reader.open(std::string{cTestString});
clp::BoundedReader bounded_reader{&string_reader, 1};
std::array<char, cTestString.size()> buf{};
size_t num_bytes_read{};
auto rc = bounded_reader.try_read(buf.data(), cTestString.size(), num_bytes_read);
REQUIRE(clp::ErrorCode_Success == rc);
REQUIRE(1 == num_bytes_read);
REQUIRE(1 == string_reader.get_pos());
REQUIRE(1 == bounded_reader.get_pos());
rc = bounded_reader.try_read(buf.data(), 1, num_bytes_read);
REQUIRE(clp::ErrorCode_EndOfFile == rc);
REQUIRE(0 == num_bytes_read);
REQUIRE(1 == string_reader.get_pos());
REQUIRE(1 == bounded_reader.get_pos());
}

SECTION("BoundedReader does allow reads before checkpoint.") {
clp::StringReader string_reader;
string_reader.open(std::string{cTestString});
clp::BoundedReader bounded_reader{&string_reader, 1};
char buf{};
size_t num_bytes_read{};
auto rc = bounded_reader.try_read(&buf, 1, num_bytes_read);
REQUIRE(clp::ErrorCode_Success == rc);
REQUIRE(1 == num_bytes_read);
REQUIRE(1 == string_reader.get_pos());
REQUIRE(1 == bounded_reader.get_pos());
}

SECTION("BoundedReader does not allow seeks beyond end of underlying stream.") {
clp::StringReader string_reader;
string_reader.open(std::string{cTestString});
clp::BoundedReader bounded_reader{&string_reader, cTestString.size() + 1};
auto rc = bounded_reader.try_seek_from_begin(cTestString.size() + 1);
REQUIRE(clp::ErrorCode_EndOfFile == rc);
REQUIRE(cTestString.size() == string_reader.get_pos());
REQUIRE(cTestString.size() == bounded_reader.get_pos());
}

SECTION("BoundedReader does not allow seeks beyond checkpoint.") {
clp::StringReader string_reader;
string_reader.open(std::string{cTestString});
clp::BoundedReader bounded_reader{&string_reader, 1};
size_t num_bytes_read{};
auto rc = bounded_reader.try_seek_from_begin(cTestString.size());
REQUIRE(clp::ErrorCode_EndOfFile == rc);
REQUIRE(1 == string_reader.get_pos());
REQUIRE(1 == bounded_reader.get_pos());
}

SECTION("BoundedReader does allow seeks before checkpoint.") {
clp::StringReader string_reader;
string_reader.open(std::string{cTestString});
clp::BoundedReader bounded_reader{&string_reader, 2};
size_t num_bytes_read{};
auto rc = bounded_reader.try_seek_from_begin(1);
REQUIRE(clp::ErrorCode_Success == rc);
REQUIRE(1 == string_reader.get_pos());
REQUIRE(1 == bounded_reader.get_pos());
}
}

0 comments on commit ddba9b9

Please sign in to comment.