Skip to content

Commit

Permalink
clp-core: Add a new FileReader class that uses system call without bu…
Browse files Browse the repository at this point in the history
…ffering. (y-scope#516)

Co-authored-by: Lin Zhihao <[email protected]>
  • Loading branch information
haiqi96 and LinZhihao-723 authored Aug 22, 2024
1 parent 4df6701 commit ab92468
Show file tree
Hide file tree
Showing 7 changed files with 266 additions and 1 deletion.
3 changes: 3 additions & 0 deletions components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,8 @@ set(SOURCE_FILES_unitTest
src/clp/ffi/Value.hpp
src/clp/FileDescriptor.cpp
src/clp/FileDescriptor.hpp
src/clp/FileDescriptorReader.cpp
src/clp/FileDescriptorReader.hpp
src/clp/FileReader.cpp
src/clp/FileReader.hpp
src/clp/FileWriter.cpp
Expand Down Expand Up @@ -484,6 +486,7 @@ set(SOURCE_FILES_unitTest
tests/test-encoding_methods.cpp
tests/test-ffi_KeyValuePairLogEvent.cpp
tests/test-ffi_SchemaTree.cpp
tests/test-FileDescriptorReader.cpp
tests/test-Grep.cpp
tests/test-hash_utils.cpp
tests/test-ir_encoding_methods.cpp
Expand Down
9 changes: 8 additions & 1 deletion components/core/src/clp/FileDescriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ FileDescriptor::~FileDescriptor() {
auto FileDescriptor::get_size() const -> size_t {
struct stat stat_result {};

if (0 != fstat(m_fd, &stat_result)) {
if (ErrorCode_Success != stat(stat_result)) {
throw OperationFailed(
ErrorCode_errno,
__FILE__,
Expand All @@ -60,4 +60,11 @@ auto FileDescriptor::get_size() const -> size_t {
}
return static_cast<size_t>(stat_result.st_size);
}

auto FileDescriptor::stat(struct stat& stat_buffer) const -> ErrorCode {
if (0 != fstat(m_fd, &stat_buffer)) {
return ErrorCode_errno;
}
return ErrorCode_Success;
}
} // namespace clp
9 changes: 9 additions & 0 deletions components/core/src/clp/FileDescriptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define CLP_FILEDESCRIPTOR_HPP

#include <fcntl.h>
#include <sys/stat.h>

#include <cstddef>
#include <string>
Expand Down Expand Up @@ -83,6 +84,14 @@ class FileDescriptor {
*/
[[nodiscard]] auto get_open_mode() const -> OpenMode { return m_open_mode; }

/**
* Obtains information about the open file associated with the underlying file descriptor.
* @param stat_buffer Returns the stat results.
* @return ErrorCode_Success on success.
* @return ErrorCode_errno on error.
*/
[[nodiscard]] auto stat(struct stat& stat_buffer) const -> ErrorCode;

private:
int m_fd{-1};
OpenMode m_open_mode;
Expand Down
58 changes: 58 additions & 0 deletions components/core/src/clp/FileDescriptorReader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include "FileDescriptorReader.hpp"

#include <sys/stat.h>
#include <unistd.h>

#include <cstddef>
#include <cstdio>
#include <span>

#include "ErrorCode.hpp"

using std::span;

namespace clp {
auto FileDescriptorReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read)
-> ErrorCode {
if (nullptr == buf) {
return ErrorCode_BadParam;
}

num_bytes_read = 0;
span dst_view{buf, num_bytes_to_read};
while (false == dst_view.empty()) {
auto const bytes_read = ::read(m_fd.get_raw_fd(), dst_view.data(), dst_view.size());
if (0 == bytes_read) {
break;
}
if (bytes_read < 0) {
return ErrorCode_errno;
}
num_bytes_read += bytes_read;
dst_view = dst_view.subspan(bytes_read);
}
if (dst_view.size() == num_bytes_to_read) {
return ErrorCode_EndOfFile;
}
return ErrorCode_Success;
}

auto FileDescriptorReader::try_seek_from_begin(size_t pos) -> ErrorCode {
if (auto const offset = lseek(m_fd.get_raw_fd(), static_cast<off_t>(pos), SEEK_SET);
static_cast<off_t>(-1) == offset)
{
return ErrorCode_errno;
}

return ErrorCode_Success;
}

auto FileDescriptorReader::try_get_pos(size_t& pos) -> ErrorCode {
auto const curr_offset = lseek(m_fd.get_raw_fd(), 0, SEEK_CUR);
if (static_cast<off_t>(-1) == curr_offset) {
return ErrorCode_errno;
}
pos = static_cast<size_t>(curr_offset);
return ErrorCode_Success;
}
} // namespace clp
104 changes: 104 additions & 0 deletions components/core/src/clp/FileDescriptorReader.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#ifndef CLP_FILEDESCRIPTORREADER_HPP
#define CLP_FILEDESCRIPTORREADER_HPP

#include <sys/stat.h>

#include <cstddef>
#include <string>
#include <string_view>
#include <utility>

#include "ErrorCode.hpp"
#include "FileDescriptor.hpp"
#include "ReaderInterface.hpp"
#include "TraceableException.hpp"

namespace clp {
/**
* Class for performing direct reads from an on-disk file using `clp::FileDescriptor` and C-style
* system call. Unlike `clp::FileReader`, which uses on `FILE` stream interface to buffer read data,
* this class does not buffer data internally. Instead, the user of this class is expected to
* buffer and read the data efficiently.
*
* Note: If you don't plan to handle the data buffering yourself, do not use this class. Use
* `clp::FileReader` instead.
*/
class FileDescriptorReader : public ReaderInterface {
public:
// Types
class OperationFailed : public TraceableException {
public:
// Constructors
OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
: TraceableException(error_code, filename, line_number) {}

// Methods
[[nodiscard]] auto what() const noexcept -> char const* override {
return "clp::FileDescriptorReader operation failed";
}
};

// Constructors
explicit FileDescriptorReader(std::string path)
: m_path{std::move(path)},
m_fd{m_path, FileDescriptor::OpenMode::ReadOnly} {}

// Explicitly disable copy constructor and assignment operator
FileDescriptorReader(FileDescriptorReader const&) = delete;
auto operator=(FileDescriptorReader const&) -> FileDescriptorReader& = delete;

// Explicitly disable move constructor and assignment operator
FileDescriptorReader(FileDescriptorReader&&) = delete;
auto operator=(FileDescriptorReader&&) -> FileDescriptorReader& = delete;

// Destructor
~FileDescriptorReader() override = default;

// Methods implementing the ReaderInterface
/**
* Tries to read up to a given number of bytes from the file.
* @param buf
* @param num_bytes_to_read The number of bytes to try and read
* @param num_bytes_read The actual number of bytes read
* @return ErrorCode_BadParam if buf is invalid
* @return ErrorCode_errno on error
* @return ErrorCode_EndOfFile on EOF
* @return ErrorCode_Success on success
*/
[[nodiscard]] auto
try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode override;

/**
* Tries to seek to the given position, relative to the beginning of the file.
* @param pos
* @return ErrorCode_errno on error
* @return ErrorCode_Success on success
*/
[[nodiscard]] auto try_seek_from_begin(size_t pos) -> ErrorCode override;

/**
@param pos Returns the position of the read head in the buffer.
* @return ErrorCode_errno on error
* @return ErrorCode_Success on success
*/
[[nodiscard]] auto try_get_pos(size_t& pos) -> ErrorCode override;

// Methods
[[nodiscard]] auto get_path() const -> std::string_view { return m_path; }

/**
* Obtains information about the open file associated with the underlying file descriptor.
* @param stat_buffer Returns the stat results.
* @return Same as `FileDescriptor::fstat`
*/
[[nodiscard]] auto try_fstat(struct stat& stat_buffer) const -> ErrorCode {
return m_fd.stat(stat_buffer);
}

private:
std::string m_path;
FileDescriptor m_fd;
};
} // namespace clp

#endif // CLP_FILEDESCRIPTORREADER_HPP
3 changes: 3 additions & 0 deletions components/core/src/clp/ReaderInterface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ class ReaderInterface {
char const* what() const noexcept override { return "ReaderInterface operation failed"; }
};

// Destructor
virtual ~ReaderInterface() = default;

// Methods
virtual ErrorCode try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) = 0;
virtual ErrorCode try_seek_from_begin(size_t pos) = 0;
Expand Down
81 changes: 81 additions & 0 deletions components/core/tests/test-FileDescriptorReader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#include <cstddef>
#include <filesystem>
#include <string>
#include <string_view>
#include <vector>

#include <Catch2/single_include/catch2/catch.hpp>

#include "../src/clp/Array.hpp"
#include "../src/clp/FileDescriptorReader.hpp"
#include "../src/clp/FileReader.hpp"
#include "../src/clp/ReaderInterface.hpp"

using clp::Array;

namespace {
// Reused code starts
constexpr size_t cDefaultReaderBufferSize{1024};

[[nodiscard]] auto get_test_input_local_path() -> std::string;

[[nodiscard]] auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path;

/**
* @param reader
* @param read_buf_size The size of the buffer to use for individual reads from the reader.
* @return All data read from the given reader.
*/
auto get_content(clp::ReaderInterface& reader, size_t read_buf_size = cDefaultReaderBufferSize)
-> std::vector<char>;

auto get_test_input_local_path() -> std::string {
std::filesystem::path const current_file_path{__FILE__};
auto const tests_dir{current_file_path.parent_path()};
return (tests_dir / get_test_input_path_relative_to_tests_dir()).string();
}

auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path {
return std::filesystem::path{"test_log_files"} / "log.txt";
}

auto get_content(clp::ReaderInterface& reader, size_t read_buf_size) -> std::vector<char> {
std::vector<char> buf;
Array<char> read_buf{read_buf_size};
for (bool has_more_content{true}; has_more_content;) {
size_t num_bytes_read{};
has_more_content = reader.read(read_buf.data(), read_buf_size, num_bytes_read);
std::string_view const view{read_buf.data(), num_bytes_read};
buf.insert(buf.cend(), view.cbegin(), view.cend());
}
return buf;
}
} // namespace

// Reused code ends

TEST_CASE("file_descriptor_reader_basic", "[FileDescriptorReader]") {
clp::FileReader ref_reader{get_test_input_local_path()};
auto const expected{get_content(ref_reader)};

clp::FileDescriptorReader reader{get_test_input_local_path()};
auto const actual{get_content(reader)};
REQUIRE((actual == expected));
}

TEST_CASE("file_descriptor_reader_with_offset_and_seek", "[FileDescriptorReader]") {
constexpr size_t cOffset{319};

clp::FileReader ref_reader{get_test_input_local_path()};
ref_reader.seek_from_begin(cOffset);
auto const expected{get_content(ref_reader)};
auto const ref_end_pos{ref_reader.get_pos()};

clp::FileDescriptorReader reader(get_test_input_local_path());
reader.seek_from_begin(cOffset);
auto const actual{get_content(reader)};
auto const actual_end_pos{reader.get_pos()};

REQUIRE((actual_end_pos == ref_end_pos));
REQUIRE((actual == expected));
}

0 comments on commit ab92468

Please sign in to comment.