From 2a64a203c7015403546ba770967db93c287c652a Mon Sep 17 00:00:00 2001 From: Pranav Srinivas Kumar Date: Mon, 27 Jul 2020 10:04:10 -0500 Subject: [PATCH 1/5] Added basic csv2::Writer class --- include/csv2/parameters.hpp | 49 +++++++++++++++++++++++++++++++++++++ include/csv2/reader.hpp | 45 +--------------------------------- include/csv2/writer.hpp | 40 ++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 44 deletions(-) create mode 100644 include/csv2/parameters.hpp create mode 100644 include/csv2/writer.hpp diff --git a/include/csv2/parameters.hpp b/include/csv2/parameters.hpp new file mode 100644 index 0000000..d9f72b3 --- /dev/null +++ b/include/csv2/parameters.hpp @@ -0,0 +1,49 @@ +#pragma once +#include + +namespace csv2 { + +namespace trim_policy { +struct no_trimming { +public: + static std::pair trim(const char *buffer, size_t start, size_t end) { + (void)(buffer); // to silence unused parameter warning + return {start, end}; + } +}; + +template struct trim_characters { +private: + constexpr static bool is_trim_char(char) { return false; } + + template constexpr static bool is_trim_char(char c, char head, Tail... tail) { + return c == head || is_trim_char(c, tail...); + } + +public: + static std::pair trim(const char *buffer, size_t start, size_t end) { + size_t new_start = start, new_end = end; + while (new_start != new_end && is_trim_char(buffer[new_start], character_list...)) + ++new_start; + while (new_start != new_end && is_trim_char(buffer[new_end - 1], character_list...)) + --new_end; + return {new_start, new_end}; + } +}; + +using trim_whitespace = trim_characters<' ', '\t'>; +} // namespace trim_policy + +template struct delimiter { + constexpr static char value = character; +}; + +template struct quote_character { + constexpr static char value = character; +}; + +template struct first_row_is_header { + constexpr static bool value = flag; +}; + +} \ No newline at end of file diff --git a/include/csv2/reader.hpp b/include/csv2/reader.hpp index e470bcd..ba823fb 100644 --- a/include/csv2/reader.hpp +++ b/include/csv2/reader.hpp @@ -1,55 +1,12 @@ #pragma once #include #include +#include #include #include -#include namespace csv2 { -namespace trim_policy { -struct no_trimming { -public: - static std::pair trim(const char *buffer, size_t start, size_t end) { - (void)(buffer); // to silence unused parameter warning - return {start, end}; - } -}; - -template struct trim_characters { -private: - constexpr static bool is_trim_char(char) { return false; } - - template constexpr static bool is_trim_char(char c, char head, Tail... tail) { - return c == head || is_trim_char(c, tail...); - } - -public: - static std::pair trim(const char *buffer, size_t start, size_t end) { - size_t new_start = start, new_end = end; - while (new_start != new_end && is_trim_char(buffer[new_start], character_list...)) - ++new_start; - while (new_start != new_end && is_trim_char(buffer[new_end - 1], character_list...)) - --new_end; - return {new_start, new_end}; - } -}; - -using trim_whitespace = trim_characters<' ', '\t'>; -} // namespace trim_policy - -template struct delimiter { - constexpr static char value = character; -}; - -template struct quote_character { - constexpr static char value = character; -}; - -template struct first_row_is_header { - constexpr static bool value = flag; -}; - template , class quote_character = quote_character<'"'>, class first_row_is_header = first_row_is_header, class trim_policy = trim_policy::trim_whitespace> diff --git a/include/csv2/writer.hpp b/include/csv2/writer.hpp new file mode 100644 index 0000000..f53047b --- /dev/null +++ b/include/csv2/writer.hpp @@ -0,0 +1,40 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +namespace csv2 { + +template > +class Writer { + std::ofstream& stream_; // output stream for the writer +public: + template + Writer(Stream&& stream) : stream_(std::forward(stream)) {} + + ~Writer() { + stream_.close(); + } + + template + void write_row(Container&& row) { + const auto& strings = std::forward(row); + const auto delimiter_string = std::string(1, delimiter::value); + std::copy(strings.begin(), strings.end() - 1, + std::ostream_iterator(stream_, delimiter_string.c_str())); + stream_ << strings.back() << "\n"; + } + + template + void write_rows(Container&& rows) { + const auto& container_of_rows = std::forward(rows); + for (const auto& row : container_of_rows) { + write_row(row); + } + } +}; + +} \ No newline at end of file From bc052e13c0048793ef0f3056c0b88fa0e7ba2f52 Mon Sep 17 00:00:00 2001 From: Pranav Srinivas Kumar Date: Mon, 27 Jul 2020 10:08:39 -0500 Subject: [PATCH 2/5] Updated README showing CSV Writer --- README.md | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f25d77c..da15eff 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ csv2

+## CSV Reader + ```cpp #include @@ -24,7 +26,7 @@ int main() { } ``` -## Performance Benchmark +### Performance Benchmark This benchmark measures the average execution time (of 5 runs after 3 warmup runs) for `csv2` to memory-map the input CSV file and iterate over every cell in the CSV. See `benchmark/main.cpp` for more details. @@ -34,7 +36,7 @@ g++ -I../include -O3 -std=c++11 -o main main.cpp ./main ``` -### Hardware +#### Hardware ``` MacBook Pro (15-inch, 2019) @@ -43,7 +45,7 @@ Memory: 32 GB 2400 MHz DDR4 Operating System: macOS Catalina version 10.15.3 ``` -### Results (as of 23 APR 2020) +#### Results (as of 23 APR 2020) | Dataset | File Size | Rows | Cols | Time | |:--- | ---:| ---:| ---:| ---:| @@ -59,7 +61,7 @@ Operating System: macOS Catalina version 10.15.3 | [SHA-1 password hash dump](https://www.kaggle.com/urvishramaiya/have-i-been-pwnd) | 11 GB | 2,62,974,241 | 2 | 19.505s | | [DOHUI NOH scaled_data](https://www.kaggle.com/seaa0612/scaled-data) | 16 GB | 496,782 | 3213 | 32.780s | -## API +### Reader API Here is the public API available to you: @@ -123,6 +125,50 @@ public: }; ``` +## CSV Writer + +This library also provides a basic `csv2::Writer` class - one that can be used to write CSV rows to file. Here's a basic usage: + +```cpp +#include +#include +#include +using namespace csv2; + +int main() { + std::ofstream stream("foo.csv"); + Writer> writer(stream); + + std::vector> rows = + { + {"a", "b", "c"}, + {"1", "2", "3"}, + {"4", "5", "6"} + }; + + writer.write_rows(rows); +} +``` + +### API + +Here is the public API available to you: + +```cpp +template > +class Writer { +public: + + // Construct using an std::ofstream + Writer(output_file_stream stream); + + // Use this to write a single row to file + void write_row(container_of_strings row); + + // Use this to write a list of rows to file + void write_rows(container_of_rows rows); +``` + ## Compiling Tests ```bash From 2027a5e50cad1c3def698d5f4bbc1a596b7ce346 Mon Sep 17 00:00:00 2001 From: Pranav Srinivas Kumar Date: Mon, 27 Jul 2020 10:11:17 -0500 Subject: [PATCH 3/5] Added table of contents --- README.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index da15eff..367393b 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,17 @@ csv2

+## Table of Contents + +* [CSV Reader](#csv-reader) + * [Performance Benchmark](#performance-benchmark) + * [Reader API](#reader-api) +* [CSV Writer](#csv-writer) + * [Writer API](#writer-api) +* [Compiling Tests](#compiling-tests) +* [Contributing](#contributing) +* [License](#license) + ## CSV Reader ```cpp @@ -150,7 +161,7 @@ int main() { } ``` -### API +### Writer API Here is the public API available to you: From 8e4fbdcba598a5de96c48750fb7de84e5dcad585 Mon Sep 17 00:00:00 2001 From: Pranav Srinivas Kumar Date: Mon, 27 Jul 2020 10:13:45 -0500 Subject: [PATCH 4/5] Added single include generation --- README.md | 7 + single_include.json | 11 + single_include/csv2/csv2.hpp | 1902 ++++++++++++++++++++++++++++++++ utils/amalgamate/CHANGES.md | 10 + utils/amalgamate/LICENSE.md | 27 + utils/amalgamate/README.md | 66 ++ utils/amalgamate/amalgamate.py | 299 +++++ utils/amalgamate/config.json | 8 + 8 files changed, 2330 insertions(+) create mode 100644 single_include.json create mode 100644 single_include/csv2/csv2.hpp create mode 100644 utils/amalgamate/CHANGES.md create mode 100644 utils/amalgamate/LICENSE.md create mode 100644 utils/amalgamate/README.md create mode 100644 utils/amalgamate/amalgamate.py create mode 100644 utils/amalgamate/config.json diff --git a/README.md b/README.md index 367393b..355c46f 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ * [CSV Writer](#csv-writer) * [Writer API](#writer-api) * [Compiling Tests](#compiling-tests) +* [Generating Single Header](#generating-single-header) * [Contributing](#contributing) * [License](#license) @@ -190,6 +191,12 @@ cd test ./csv2_test ``` +## Generating Single Header + +```bash +python3 utils/amalgamate/amalgamate.py -c single_include.json -s . +``` + ## Contributing Contributions are welcome, have a look at the [CONTRIBUTING.md](CONTRIBUTING.md) document for more information. diff --git a/single_include.json b/single_include.json new file mode 100644 index 0000000..7676561 --- /dev/null +++ b/single_include.json @@ -0,0 +1,11 @@ +{ + "project": "CSV for Modern C++", + "target": "single_include/csv2/csv2.hpp", + "sources": [ + "include/csv2/mio.hpp", + "include/csv2/parameters.hpp", + "include/csv2/reader.hpp", + "include/csv2/writer.hpp" + ], + "include_paths": ["include"] +} diff --git a/single_include/csv2/csv2.hpp b/single_include/csv2/csv2.hpp new file mode 100644 index 0000000..57cb43d --- /dev/null +++ b/single_include/csv2/csv2.hpp @@ -0,0 +1,1902 @@ +/* Copyright 2017 https://github.com/mandreyel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this + * software and associated documentation files (the "Software"), to deal in the Software + * without restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be included in all copies + * or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef MIO_MMAP_HEADER +#define MIO_MMAP_HEADER + +// #include "mio/page.hpp" +/* Copyright 2017 https://github.com/mandreyel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this + * software and associated documentation files (the "Software"), to deal in the Software + * without restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be included in all copies + * or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef MIO_PAGE_HEADER +#define MIO_PAGE_HEADER + +#ifdef _WIN32 +#include +#else +#include +#endif + +namespace mio { + +/** + * This is used by `basic_mmap` to determine whether to create a read-only or + * a read-write memory mapping. + */ +enum class access_mode { read, write }; + +/** + * Determines the operating system's page allocation granularity. + * + * On the first call to this function, it invokes the operating system specific syscall + * to determine the page size, caches the value, and returns it. Any subsequent call to + * this function serves the cached value, so no further syscalls are made. + */ +inline size_t page_size() { + static const size_t page_size = [] { +#ifdef _WIN32 + SYSTEM_INFO SystemInfo; + GetSystemInfo(&SystemInfo); + return SystemInfo.dwAllocationGranularity; +#else + return sysconf(_SC_PAGE_SIZE); +#endif + }(); + return page_size; +} + +/** + * Alligns `offset` to the operating's system page size such that it subtracts the + * difference until the nearest page boundary before `offset`, or does nothing if + * `offset` is already page aligned. + */ +inline size_t make_offset_page_aligned(size_t offset) noexcept { + const size_t page_size_ = page_size(); + // Use integer division to round down to the nearest page alignment. + return offset / page_size_ * page_size_; +} + +} // namespace mio + +#endif // MIO_PAGE_HEADER + +#include +#include +#include +#include + +#ifdef _WIN32 +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif // WIN32_LEAN_AND_MEAN +#include +#else // ifdef _WIN32 +#define INVALID_HANDLE_VALUE -1 +#endif // ifdef _WIN32 + +namespace mio { + +// This value may be provided as the `length` parameter to the constructor or +// `map`, in which case a memory mapping of the entire file is created. +enum { map_entire_file = 0 }; + +#ifdef _WIN32 +using file_handle_type = HANDLE; +#else +using file_handle_type = int; +#endif + +// This value represents an invalid file handle type. This can be used to +// determine whether `basic_mmap::file_handle` is valid, for example. +const static file_handle_type invalid_handle = INVALID_HANDLE_VALUE; + +template struct basic_mmap { + using value_type = ByteT; + using size_type = size_t; + using reference = value_type &; + using const_reference = const value_type &; + using pointer = value_type *; + using const_pointer = const value_type *; + using difference_type = std::ptrdiff_t; + using iterator = pointer; + using const_iterator = const_pointer; + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + using iterator_category = std::random_access_iterator_tag; + using handle_type = file_handle_type; + + static_assert(sizeof(ByteT) == sizeof(char), "ByteT must be the same size as char."); + +private: + // Points to the first requested byte, and not to the actual start of the mapping. + pointer data_ = nullptr; + + // Length--in bytes--requested by user (which may not be the length of the + // full mapping) and the length of the full mapping. + size_type length_ = 0; + size_type mapped_length_ = 0; + + // Letting user map a file using both an existing file handle and a path + // introcudes some complexity (see `is_handle_internal_`). + // On POSIX, we only need a file handle to create a mapping, while on + // Windows systems the file handle is necessary to retrieve a file mapping + // handle, but any subsequent operations on the mapped region must be done + // through the latter. + handle_type file_handle_ = INVALID_HANDLE_VALUE; +#ifdef _WIN32 + handle_type file_mapping_handle_ = INVALID_HANDLE_VALUE; +#endif + + // Letting user map a file using both an existing file handle and a path + // introcudes some complexity in that we must not close the file handle if + // user provided it, but we must close it if we obtained it using the + // provided path. For this reason, this flag is used to determine when to + // close `file_handle_`. + bool is_handle_internal_; + +public: + /** + * The default constructed mmap object is in a non-mapped state, that is, + * any operation that attempts to access nonexistent underlying data will + * result in undefined behaviour/segmentation faults. + */ + basic_mmap() = default; + +#ifdef __cpp_exceptions + /** + * The same as invoking the `map` function, except any error that may occur + * while establishing the mapping is wrapped in a `std::system_error` and is + * thrown. + */ + template + basic_mmap(const String &path, const size_type offset = 0, + const size_type length = map_entire_file) { + std::error_code error; + map(path, offset, length, error); + if (error) { + throw std::system_error(error); + } + } + + /** + * The same as invoking the `map` function, except any error that may occur + * while establishing the mapping is wrapped in a `std::system_error` and is + * thrown. + */ + basic_mmap(const handle_type handle, const size_type offset = 0, + const size_type length = map_entire_file) { + std::error_code error; + map(handle, offset, length, error); + if (error) { + throw std::system_error(error); + } + } +#endif // __cpp_exceptions + + /** + * `basic_mmap` has single-ownership semantics, so transferring ownership + * may only be accomplished by moving the object. + */ + basic_mmap(const basic_mmap &) = delete; + basic_mmap(basic_mmap &&); + basic_mmap &operator=(const basic_mmap &) = delete; + basic_mmap &operator=(basic_mmap &&); + + /** + * If this is a read-write mapping, the destructor invokes sync. Regardless + * of the access mode, unmap is invoked as a final step. + */ + ~basic_mmap(); + + /** + * On UNIX systems 'file_handle' and 'mapping_handle' are the same. On Windows, + * however, a mapped region of a file gets its own handle, which is returned by + * 'mapping_handle'. + */ + handle_type file_handle() const noexcept { return file_handle_; } + handle_type mapping_handle() const noexcept; + + /** Returns whether a valid memory mapping has been created. */ + bool is_open() const noexcept { return file_handle_ != invalid_handle; } + + /** + * Returns true if no mapping was established, that is, conceptually the + * same as though the length that was mapped was 0. This function is + * provided so that this class has Container semantics. + */ + bool empty() const noexcept { return length() == 0; } + + /** Returns true if a mapping was established. */ + bool is_mapped() const noexcept; + + /** + * `size` and `length` both return the logical length, i.e. the number of bytes + * user requested to be mapped, while `mapped_length` returns the actual number of + * bytes that were mapped which is a multiple of the underlying operating system's + * page allocation granularity. + */ + size_type size() const noexcept { return length(); } + size_type length() const noexcept { return length_; } + size_type mapped_length() const noexcept { return mapped_length_; } + + /** Returns the offset relative to the start of the mapping. */ + size_type mapping_offset() const noexcept { return mapped_length_ - length_; } + + /** + * Returns a pointer to the first requested byte, or `nullptr` if no memory mapping + * exists. + */ + template ::type> + pointer data() noexcept { + return data_; + } + const_pointer data() const noexcept { return data_; } + + /** + * Returns an iterator to the first requested byte, if a valid memory mapping + * exists, otherwise this function call is undefined behaviour. + */ + template ::type> + iterator begin() noexcept { + return data(); + } + const_iterator begin() const noexcept { return data(); } + const_iterator cbegin() const noexcept { return data(); } + + /** + * Returns an iterator one past the last requested byte, if a valid memory mapping + * exists, otherwise this function call is undefined behaviour. + */ + template ::type> + iterator end() noexcept { + return data() + length(); + } + const_iterator end() const noexcept { return data() + length(); } + const_iterator cend() const noexcept { return data() + length(); } + + /** + * Returns a reverse iterator to the last memory mapped byte, if a valid + * memory mapping exists, otherwise this function call is undefined + * behaviour. + */ + template ::type> + reverse_iterator rbegin() noexcept { + return reverse_iterator(end()); + } + const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); } + const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); } + + /** + * Returns a reverse iterator past the first mapped byte, if a valid memory + * mapping exists, otherwise this function call is undefined behaviour. + */ + template ::type> + reverse_iterator rend() noexcept { + return reverse_iterator(begin()); + } + const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); } + const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); } + + /** + * Returns a reference to the `i`th byte from the first requested byte (as returned + * by `data`). If this is invoked when no valid memory mapping has been created + * prior to this call, undefined behaviour ensues. + */ + reference operator[](const size_type i) noexcept { return data_[i]; } + const_reference operator[](const size_type i) const noexcept { return data_[i]; } + + /** + * Establishes a memory mapping with AccessMode. If the mapping is unsuccesful, the + * reason is reported via `error` and the object remains in a state as if this + * function hadn't been called. + * + * `path`, which must be a path to an existing file, is used to retrieve a file + * handle (which is closed when the object destructs or `unmap` is called), which is + * then used to memory map the requested region. Upon failure, `error` is set to + * indicate the reason and the object remains in an unmapped state. + * + * `offset` is the number of bytes, relative to the start of the file, where the + * mapping should begin. When specifying it, there is no need to worry about + * providing a value that is aligned with the operating system's page allocation + * granularity. This is adjusted by the implementation such that the first requested + * byte (as returned by `data` or `begin`), so long as `offset` is valid, will be at + * `offset` from the start of the file. + * + * `length` is the number of bytes to map. It may be `map_entire_file`, in which + * case a mapping of the entire file is created. + */ + template + void map(const String &path, const size_type offset, const size_type length, + std::error_code &error); + + /** + * Establishes a memory mapping with AccessMode. If the mapping is unsuccesful, the + * reason is reported via `error` and the object remains in a state as if this + * function hadn't been called. + * + * `path`, which must be a path to an existing file, is used to retrieve a file + * handle (which is closed when the object destructs or `unmap` is called), which is + * then used to memory map the requested region. Upon failure, `error` is set to + * indicate the reason and the object remains in an unmapped state. + * + * The entire file is mapped. + */ + template void map(const String &path, std::error_code &error) { + map(path, 0, map_entire_file, error); + } + + /** + * Establishes a memory mapping with AccessMode. If the mapping is + * unsuccesful, the reason is reported via `error` and the object remains in + * a state as if this function hadn't been called. + * + * `handle`, which must be a valid file handle, which is used to memory map the + * requested region. Upon failure, `error` is set to indicate the reason and the + * object remains in an unmapped state. + * + * `offset` is the number of bytes, relative to the start of the file, where the + * mapping should begin. When specifying it, there is no need to worry about + * providing a value that is aligned with the operating system's page allocation + * granularity. This is adjusted by the implementation such that the first requested + * byte (as returned by `data` or `begin`), so long as `offset` is valid, will be at + * `offset` from the start of the file. + * + * `length` is the number of bytes to map. It may be `map_entire_file`, in which + * case a mapping of the entire file is created. + */ + void map(const handle_type handle, const size_type offset, const size_type length, + std::error_code &error); + + /** + * Establishes a memory mapping with AccessMode. If the mapping is + * unsuccesful, the reason is reported via `error` and the object remains in + * a state as if this function hadn't been called. + * + * `handle`, which must be a valid file handle, which is used to memory map the + * requested region. Upon failure, `error` is set to indicate the reason and the + * object remains in an unmapped state. + * + * The entire file is mapped. + */ + void map(const handle_type handle, std::error_code &error) { + map(handle, 0, map_entire_file, error); + } + + /** + * If a valid memory mapping has been created prior to this call, this call + * instructs the kernel to unmap the memory region and disassociate this object + * from the file. + * + * The file handle associated with the file that is mapped is only closed if the + * mapping was created using a file path. If, on the other hand, an existing + * file handle was used to create the mapping, the file handle is not closed. + */ + void unmap(); + + void swap(basic_mmap &other); + + /** Flushes the memory mapped page to disk. Errors are reported via `error`. */ + template + typename std::enable_if::type sync(std::error_code &error); + + /** + * All operators compare the address of the first byte and size of the two mapped + * regions. + */ + +private: + template ::type> + pointer get_mapping_start() noexcept { + return !data() ? nullptr : data() - mapping_offset(); + } + + const_pointer get_mapping_start() const noexcept { + return !data() ? nullptr : data() - mapping_offset(); + } + + /** + * The destructor syncs changes to disk if `AccessMode` is `write`, but not + * if it's `read`, but since the destructor cannot be templated, we need to + * do SFINAE in a dedicated function, where one syncs and the other is a noop. + */ + template + typename std::enable_if::type conditional_sync(); + template + typename std::enable_if::type conditional_sync(); +}; + +template +bool operator==(const basic_mmap &a, const basic_mmap &b); + +template +bool operator!=(const basic_mmap &a, const basic_mmap &b); + +template +bool operator<(const basic_mmap &a, const basic_mmap &b); + +template +bool operator<=(const basic_mmap &a, const basic_mmap &b); + +template +bool operator>(const basic_mmap &a, const basic_mmap &b); + +template +bool operator>=(const basic_mmap &a, const basic_mmap &b); + +/** + * This is the basis for all read-only mmap objects and should be preferred over + * directly using `basic_mmap`. + */ +template using basic_mmap_source = basic_mmap; + +/** + * This is the basis for all read-write mmap objects and should be preferred over + * directly using `basic_mmap`. + */ +template using basic_mmap_sink = basic_mmap; + +/** + * These aliases cover the most common use cases, both representing a raw byte stream + * (either with a char or an unsigned char/uint8_t). + */ +using mmap_source = basic_mmap_source; +using ummap_source = basic_mmap_source; + +using mmap_sink = basic_mmap_sink; +using ummap_sink = basic_mmap_sink; + +/** + * Convenience factory method that constructs a mapping for any `basic_mmap` or + * `basic_mmap` type. + */ +template +MMap make_mmap(const MappingToken &token, int64_t offset, int64_t length, std::error_code &error) { + MMap mmap; + mmap.map(token, offset, length, error); + return mmap; +} + +/** + * Convenience factory method. + * + * MappingToken may be a String (`std::string`, `std::string_view`, `const char*`, + * `std::filesystem::path`, `std::vector`, or similar), or a + * `mmap_source::handle_type`. + */ +template +mmap_source make_mmap_source(const MappingToken &token, mmap_source::size_type offset, + mmap_source::size_type length, std::error_code &error) { + return make_mmap(token, offset, length, error); +} + +template +mmap_source make_mmap_source(const MappingToken &token, std::error_code &error) { + return make_mmap_source(token, 0, map_entire_file, error); +} + +/** + * Convenience factory method. + * + * MappingToken may be a String (`std::string`, `std::string_view`, `const char*`, + * `std::filesystem::path`, `std::vector`, or similar), or a + * `mmap_sink::handle_type`. + */ +template +mmap_sink make_mmap_sink(const MappingToken &token, mmap_sink::size_type offset, + mmap_sink::size_type length, std::error_code &error) { + return make_mmap(token, offset, length, error); +} + +template +mmap_sink make_mmap_sink(const MappingToken &token, std::error_code &error) { + return make_mmap_sink(token, 0, map_entire_file, error); +} + +} // namespace mio + +// #include "detail/mmap.ipp" +/* Copyright 2017 https://github.com/mandreyel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this + * software and associated documentation files (the "Software"), to deal in the Software + * without restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be included in all copies + * or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef MIO_BASIC_MMAP_IMPL +#define MIO_BASIC_MMAP_IMPL + +// #include "mio/mmap.hpp" + +// #include "mio/page.hpp" + +// #include "mio/detail/string_util.hpp" +/* Copyright 2017 https://github.com/mandreyel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this + * software and associated documentation files (the "Software"), to deal in the Software + * without restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be included in all copies + * or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef MIO_STRING_UTIL_HEADER +#define MIO_STRING_UTIL_HEADER + +#include + +namespace mio { +namespace detail { + +template ::type, + typename = decltype(std::declval().data()), + typename = typename std::enable_if::value +#ifdef _WIN32 + || std::is_same::value +#endif + >::type> +struct char_type_helper { + using type = typename C::value_type; +}; + +template struct char_type { using type = typename char_type_helper::type; }; + +// TODO: can we avoid this brute force approach? +template <> struct char_type { using type = char; }; + +template <> struct char_type { using type = char; }; + +template struct char_type { using type = char; }; + +template struct char_type { using type = char; }; + +#ifdef _WIN32 +template <> struct char_type { using type = wchar_t; }; + +template <> struct char_type { using type = wchar_t; }; + +template struct char_type { using type = wchar_t; }; + +template struct char_type { using type = wchar_t; }; +#endif // _WIN32 + +template struct is_c_str_helper { + static constexpr bool value = + std::is_same::type>::type>::type>::type>::value; +}; + +template struct is_c_str { + static constexpr bool value = is_c_str_helper::value; +}; + +#ifdef _WIN32 +template struct is_c_wstr { + static constexpr bool value = is_c_str_helper::value; +}; +#endif // _WIN32 + +template struct is_c_str_or_c_wstr { + static constexpr bool value = is_c_str::value +#ifdef _WIN32 + || is_c_wstr::value +#endif + ; +}; + +template ().data()), + typename = typename std::enable_if::value>::type> +const typename char_type::type *c_str(const String &path) { + return path.data(); +} + +template ().empty()), + typename = typename std::enable_if::value>::type> +bool empty(const String &path) { + return path.empty(); +} + +template ::value>::type> +const typename char_type::type *c_str(String path) { + return path; +} + +template ::value>::type> +bool empty(String path) { + return !path || (*path == 0); +} + +} // namespace detail +} // namespace mio + +#endif // MIO_STRING_UTIL_HEADER + +#include + +#ifndef _WIN32 +#include +#include +#include +#include +#endif + +namespace mio { +namespace detail { + +#ifdef _WIN32 +namespace win { + +/** Returns the 4 upper bytes of an 8-byte integer. */ +inline DWORD int64_high(int64_t n) noexcept { return n >> 32; } + +/** Returns the 4 lower bytes of an 8-byte integer. */ +inline DWORD int64_low(int64_t n) noexcept { return n & 0xffffffff; } + +template ::type, char>::value>::type> +file_handle_type open_file_helper(const String &path, const access_mode mode) { + return ::CreateFileA( + c_str(path), mode == access_mode::read ? GENERIC_READ : GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_READ | FILE_SHARE_WRITE, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); +} + +template +typename std::enable_if::type, wchar_t>::value, + file_handle_type>::type +open_file_helper(const String &path, const access_mode mode) { + return ::CreateFileW( + c_str(path), mode == access_mode::read ? GENERIC_READ : GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_READ | FILE_SHARE_WRITE, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); +} + +} // namespace win +#endif // _WIN32 + +/** + * Returns the last platform specific system error (errno on POSIX and + * GetLastError on Win) as a `std::error_code`. + */ +inline std::error_code last_error() noexcept { + std::error_code error; +#ifdef _WIN32 + error.assign(GetLastError(), std::system_category()); +#else + error.assign(errno, std::system_category()); +#endif + return error; +} + +template +file_handle_type open_file(const String &path, const access_mode mode, std::error_code &error) { + error.clear(); + if (detail::empty(path)) { + error = std::make_error_code(std::errc::invalid_argument); + return invalid_handle; + } +#ifdef _WIN32 + const auto handle = win::open_file_helper(path, mode); +#else // POSIX + const auto handle = ::open(c_str(path), mode == access_mode::read ? O_RDONLY : O_RDWR); +#endif + if (handle == invalid_handle) { + error = detail::last_error(); + } + return handle; +} + +inline size_t query_file_size(file_handle_type handle, std::error_code &error) { + error.clear(); +#ifdef _WIN32 + LARGE_INTEGER file_size; + if (::GetFileSizeEx(handle, &file_size) == 0) { + error = detail::last_error(); + return 0; + } + return static_cast(file_size.QuadPart); +#else // POSIX + struct stat sbuf; + if (::fstat(handle, &sbuf) == -1) { + error = detail::last_error(); + return 0; + } + return sbuf.st_size; +#endif +} + +struct mmap_context { + char *data; + int64_t length; + int64_t mapped_length; +#ifdef _WIN32 + file_handle_type file_mapping_handle; +#endif +}; + +inline mmap_context memory_map(const file_handle_type file_handle, const int64_t offset, + const int64_t length, const access_mode mode, + std::error_code &error) { + const int64_t aligned_offset = make_offset_page_aligned(offset); + const int64_t length_to_map = offset - aligned_offset + length; +#ifdef _WIN32 + const int64_t max_file_size = offset + length; + const auto file_mapping_handle = ::CreateFileMapping( + file_handle, 0, mode == access_mode::read ? PAGE_READONLY : PAGE_READWRITE, + win::int64_high(max_file_size), win::int64_low(max_file_size), 0); + if (file_mapping_handle == invalid_handle) { + error = detail::last_error(); + return {}; + } + char *mapping_start = static_cast(::MapViewOfFile( + file_mapping_handle, mode == access_mode::read ? FILE_MAP_READ : FILE_MAP_WRITE, + win::int64_high(aligned_offset), win::int64_low(aligned_offset), length_to_map)); + if (mapping_start == nullptr) { + // Close file handle if mapping it failed. + ::CloseHandle(file_mapping_handle); + error = detail::last_error(); + return {}; + } +#else // POSIX + char *mapping_start = + static_cast(::mmap(0, // Don't give hint as to where to map. + length_to_map, mode == access_mode::read ? PROT_READ : PROT_WRITE, + MAP_SHARED, file_handle, aligned_offset)); + if (mapping_start == MAP_FAILED) { + error = detail::last_error(); + return {}; + } +#endif + mmap_context ctx; + ctx.data = mapping_start + offset - aligned_offset; + ctx.length = length; + ctx.mapped_length = length_to_map; +#ifdef _WIN32 + ctx.file_mapping_handle = file_mapping_handle; +#endif + return ctx; +} + +} // namespace detail + +// -- basic_mmap -- + +template basic_mmap::~basic_mmap() { + conditional_sync(); + unmap(); +} + +template +basic_mmap::basic_mmap(basic_mmap &&other) + : data_(std::move(other.data_)), length_(std::move(other.length_)), + mapped_length_(std::move(other.mapped_length_)), file_handle_(std::move(other.file_handle_)) +#ifdef _WIN32 + , + file_mapping_handle_(std::move(other.file_mapping_handle_)) +#endif + , + is_handle_internal_(std::move(other.is_handle_internal_)) { + other.data_ = nullptr; + other.length_ = other.mapped_length_ = 0; + other.file_handle_ = invalid_handle; +#ifdef _WIN32 + other.file_mapping_handle_ = invalid_handle; +#endif +} + +template +basic_mmap &basic_mmap::operator=(basic_mmap &&other) { + if (this != &other) { + // First the existing mapping needs to be removed. + unmap(); + data_ = std::move(other.data_); + length_ = std::move(other.length_); + mapped_length_ = std::move(other.mapped_length_); + file_handle_ = std::move(other.file_handle_); +#ifdef _WIN32 + file_mapping_handle_ = std::move(other.file_mapping_handle_); +#endif + is_handle_internal_ = std::move(other.is_handle_internal_); + + // The moved from basic_mmap's fields need to be reset, because + // otherwise other's destructor will unmap the same mapping that was + // just moved into this. + other.data_ = nullptr; + other.length_ = other.mapped_length_ = 0; + other.file_handle_ = invalid_handle; +#ifdef _WIN32 + other.file_mapping_handle_ = invalid_handle; +#endif + other.is_handle_internal_ = false; + } + return *this; +} + +template +typename basic_mmap::handle_type +basic_mmap::mapping_handle() const noexcept { +#ifdef _WIN32 + return file_mapping_handle_; +#else + return file_handle_; +#endif +} + +template +template +void basic_mmap::map(const String &path, const size_type offset, + const size_type length, std::error_code &error) { + error.clear(); + if (detail::empty(path)) { + error = std::make_error_code(std::errc::invalid_argument); + return; + } + const auto handle = detail::open_file(path, AccessMode, error); + if (error) { + return; + } + + map(handle, offset, length, error); + // This MUST be after the call to map, as that sets this to true. + if (!error) { + is_handle_internal_ = true; + } +} + +template +void basic_mmap::map(const handle_type handle, const size_type offset, + const size_type length, std::error_code &error) { + error.clear(); + if (handle == invalid_handle) { + error = std::make_error_code(std::errc::bad_file_descriptor); + return; + } + + const auto file_size = detail::query_file_size(handle, error); + if (error) { + return; + } + + if (offset + length > file_size) { + error = std::make_error_code(std::errc::invalid_argument); + return; + } + + const auto ctx = detail::memory_map( + handle, offset, length == map_entire_file ? (file_size - offset) : length, AccessMode, error); + if (!error) { + // We must unmap the previous mapping that may have existed prior to this call. + // Note that this must only be invoked after a new mapping has been created in + // order to provide the strong guarantee that, should the new mapping fail, the + // `map` function leaves this instance in a state as though the function had + // never been invoked. + unmap(); + file_handle_ = handle; + is_handle_internal_ = false; + data_ = reinterpret_cast(ctx.data); + length_ = ctx.length; + mapped_length_ = ctx.mapped_length; +#ifdef _WIN32 + file_mapping_handle_ = ctx.file_mapping_handle; +#endif + } +} + +template +template +typename std::enable_if::type +basic_mmap::sync(std::error_code &error) { + error.clear(); + if (!is_open()) { + error = std::make_error_code(std::errc::bad_file_descriptor); + return; + } + + if (data()) { +#ifdef _WIN32 + if (::FlushViewOfFile(get_mapping_start(), mapped_length_) == 0 || + ::FlushFileBuffers(file_handle_) == 0) +#else // POSIX + if (::msync(get_mapping_start(), mapped_length_, MS_SYNC) != 0) +#endif + { + error = detail::last_error(); + return; + } + } +#ifdef _WIN32 + if (::FlushFileBuffers(file_handle_) == 0) { + error = detail::last_error(); + } +#endif +} + +template void basic_mmap::unmap() { + if (!is_open()) { + return; + } + // TODO do we care about errors here? +#ifdef _WIN32 + if (is_mapped()) { + ::UnmapViewOfFile(get_mapping_start()); + ::CloseHandle(file_mapping_handle_); + } +#else // POSIX + if (data_) { + ::munmap(const_cast(get_mapping_start()), mapped_length_); + } +#endif + + // If `file_handle_` was obtained by our opening it (when map is called with + // a path, rather than an existing file handle), we need to close it, + // otherwise it must not be closed as it may still be used outside this + // instance. + if (is_handle_internal_) { +#ifdef _WIN32 + ::CloseHandle(file_handle_); +#else // POSIX + ::close(file_handle_); +#endif + } + + // Reset fields to their default values. + data_ = nullptr; + length_ = mapped_length_ = 0; + file_handle_ = invalid_handle; +#ifdef _WIN32 + file_mapping_handle_ = invalid_handle; +#endif +} + +template +bool basic_mmap::is_mapped() const noexcept { +#ifdef _WIN32 + return file_mapping_handle_ != invalid_handle; +#else // POSIX + return is_open(); +#endif +} + +template +void basic_mmap::swap(basic_mmap &other) { + if (this != &other) { + using std::swap; + swap(data_, other.data_); + swap(file_handle_, other.file_handle_); +#ifdef _WIN32 + swap(file_mapping_handle_, other.file_mapping_handle_); +#endif + swap(length_, other.length_); + swap(mapped_length_, other.mapped_length_); + swap(is_handle_internal_, other.is_handle_internal_); + } +} + +template +template +typename std::enable_if::type +basic_mmap::conditional_sync() { + // This is invoked from the destructor, so not much we can do about + // failures here. + std::error_code ec; + sync(ec); +} + +template +template +typename std::enable_if::type +basic_mmap::conditional_sync() { + // noop +} + +template +bool operator==(const basic_mmap &a, const basic_mmap &b) { + return a.data() == b.data() && a.size() == b.size(); +} + +template +bool operator!=(const basic_mmap &a, const basic_mmap &b) { + return !(a == b); +} + +template +bool operator<(const basic_mmap &a, const basic_mmap &b) { + if (a.data() == b.data()) { + return a.size() < b.size(); + } + return a.data() < b.data(); +} + +template +bool operator<=(const basic_mmap &a, const basic_mmap &b) { + return !(a > b); +} + +template +bool operator>(const basic_mmap &a, const basic_mmap &b) { + if (a.data() == b.data()) { + return a.size() > b.size(); + } + return a.data() > b.data(); +} + +template +bool operator>=(const basic_mmap &a, const basic_mmap &b) { + return !(a < b); +} + +} // namespace mio + +#endif // MIO_BASIC_MMAP_IMPL + +#endif // MIO_MMAP_HEADER +/* Copyright 2017 https://github.com/mandreyel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this + * software and associated documentation files (the "Software"), to deal in the Software + * without restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be included in all copies + * or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef MIO_PAGE_HEADER +#define MIO_PAGE_HEADER + +#ifdef _WIN32 +#include +#else +#include +#endif + +namespace mio { + +/** + * This is used by `basic_mmap` to determine whether to create a read-only or + * a read-write memory mapping. + */ +enum class access_mode { read, write }; + +/** + * Determines the operating system's page allocation granularity. + * + * On the first call to this function, it invokes the operating system specific syscall + * to determine the page size, caches the value, and returns it. Any subsequent call to + * this function serves the cached value, so no further syscalls are made. + */ +inline size_t page_size() { + static const size_t page_size = [] { +#ifdef _WIN32 + SYSTEM_INFO SystemInfo; + GetSystemInfo(&SystemInfo); + return SystemInfo.dwAllocationGranularity; +#else + return sysconf(_SC_PAGE_SIZE); +#endif + }(); + return page_size; +} + +/** + * Alligns `offset` to the operating's system page size such that it subtracts the + * difference until the nearest page boundary before `offset`, or does nothing if + * `offset` is already page aligned. + */ +inline size_t make_offset_page_aligned(size_t offset) noexcept { + const size_t page_size_ = page_size(); + // Use integer division to round down to the nearest page alignment. + return offset / page_size_ * page_size_; +} + +} // namespace mio + +#endif // MIO_PAGE_HEADER +/* Copyright 2017 https://github.com/mandreyel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this + * software and associated documentation files (the "Software"), to deal in the Software + * without restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be included in all copies + * or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef MIO_SHARED_MMAP_HEADER +#define MIO_SHARED_MMAP_HEADER + +// #include "mio/mmap.hpp" + +#include // std::shared_ptr +#include // std::error_code + +namespace mio { + +/** + * Exposes (nearly) the same interface as `basic_mmap`, but endowes it with + * `std::shared_ptr` semantics. + * + * This is not the default behaviour of `basic_mmap` to avoid allocating on the heap if + * shared semantics are not required. + */ +template class basic_shared_mmap { + using impl_type = basic_mmap; + std::shared_ptr pimpl_; + +public: + using value_type = typename impl_type::value_type; + using size_type = typename impl_type::size_type; + using reference = typename impl_type::reference; + using const_reference = typename impl_type::const_reference; + using pointer = typename impl_type::pointer; + using const_pointer = typename impl_type::const_pointer; + using difference_type = typename impl_type::difference_type; + using iterator = typename impl_type::iterator; + using const_iterator = typename impl_type::const_iterator; + using reverse_iterator = typename impl_type::reverse_iterator; + using const_reverse_iterator = typename impl_type::const_reverse_iterator; + using iterator_category = typename impl_type::iterator_category; + using handle_type = typename impl_type::handle_type; + using mmap_type = impl_type; + + basic_shared_mmap() = default; + basic_shared_mmap(const basic_shared_mmap &) = default; + basic_shared_mmap &operator=(const basic_shared_mmap &) = default; + basic_shared_mmap(basic_shared_mmap &&) = default; + basic_shared_mmap &operator=(basic_shared_mmap &&) = default; + + /** Takes ownership of an existing mmap object. */ + basic_shared_mmap(mmap_type &&mmap) : pimpl_(std::make_shared(std::move(mmap))) {} + + /** Takes ownership of an existing mmap object. */ + basic_shared_mmap &operator=(mmap_type &&mmap) { + pimpl_ = std::make_shared(std::move(mmap)); + return *this; + } + + /** Initializes this object with an already established shared mmap. */ + basic_shared_mmap(std::shared_ptr mmap) : pimpl_(std::move(mmap)) {} + + /** Initializes this object with an already established shared mmap. */ + basic_shared_mmap &operator=(std::shared_ptr mmap) { + pimpl_ = std::move(mmap); + return *this; + } + +#ifdef __cpp_exceptions + /** + * The same as invoking the `map` function, except any error that may occur + * while establishing the mapping is wrapped in a `std::system_error` and is + * thrown. + */ + template + basic_shared_mmap(const String &path, const size_type offset = 0, + const size_type length = map_entire_file) { + std::error_code error; + map(path, offset, length, error); + if (error) { + throw std::system_error(error); + } + } + + /** + * The same as invoking the `map` function, except any error that may occur + * while establishing the mapping is wrapped in a `std::system_error` and is + * thrown. + */ + basic_shared_mmap(const handle_type handle, const size_type offset = 0, + const size_type length = map_entire_file) { + std::error_code error; + map(handle, offset, length, error); + if (error) { + throw std::system_error(error); + } + } +#endif // __cpp_exceptions + + /** + * If this is a read-write mapping and the last reference to the mapping, + * the destructor invokes sync. Regardless of the access mode, unmap is + * invoked as a final step. + */ + ~basic_shared_mmap() = default; + + /** Returns the underlying `std::shared_ptr` instance that holds the mmap. */ + std::shared_ptr get_shared_ptr() { return pimpl_; } + + /** + * On UNIX systems 'file_handle' and 'mapping_handle' are the same. On Windows, + * however, a mapped region of a file gets its own handle, which is returned by + * 'mapping_handle'. + */ + handle_type file_handle() const noexcept { + return pimpl_ ? pimpl_->file_handle() : invalid_handle; + } + + handle_type mapping_handle() const noexcept { + return pimpl_ ? pimpl_->mapping_handle() : invalid_handle; + } + + /** Returns whether a valid memory mapping has been created. */ + bool is_open() const noexcept { return pimpl_ && pimpl_->is_open(); } + + /** + * Returns true if no mapping was established, that is, conceptually the + * same as though the length that was mapped was 0. This function is + * provided so that this class has Container semantics. + */ + bool empty() const noexcept { return !pimpl_ || pimpl_->empty(); } + + /** + * `size` and `length` both return the logical length, i.e. the number of bytes + * user requested to be mapped, while `mapped_length` returns the actual number of + * bytes that were mapped which is a multiple of the underlying operating system's + * page allocation granularity. + */ + size_type size() const noexcept { return pimpl_ ? pimpl_->length() : 0; } + size_type length() const noexcept { return pimpl_ ? pimpl_->length() : 0; } + size_type mapped_length() const noexcept { return pimpl_ ? pimpl_->mapped_length() : 0; } + + /** + * Returns a pointer to the first requested byte, or `nullptr` if no memory mapping + * exists. + */ + template ::type> + pointer data() noexcept { + return pimpl_->data(); + } + const_pointer data() const noexcept { return pimpl_ ? pimpl_->data() : nullptr; } + + /** + * Returns an iterator to the first requested byte, if a valid memory mapping + * exists, otherwise this function call is undefined behaviour. + */ + iterator begin() noexcept { return pimpl_->begin(); } + const_iterator begin() const noexcept { return pimpl_->begin(); } + const_iterator cbegin() const noexcept { return pimpl_->cbegin(); } + + /** + * Returns an iterator one past the last requested byte, if a valid memory mapping + * exists, otherwise this function call is undefined behaviour. + */ + template ::type> + iterator end() noexcept { + return pimpl_->end(); + } + const_iterator end() const noexcept { return pimpl_->end(); } + const_iterator cend() const noexcept { return pimpl_->cend(); } + + /** + * Returns a reverse iterator to the last memory mapped byte, if a valid + * memory mapping exists, otherwise this function call is undefined + * behaviour. + */ + template ::type> + reverse_iterator rbegin() noexcept { + return pimpl_->rbegin(); + } + const_reverse_iterator rbegin() const noexcept { return pimpl_->rbegin(); } + const_reverse_iterator crbegin() const noexcept { return pimpl_->crbegin(); } + + /** + * Returns a reverse iterator past the first mapped byte, if a valid memory + * mapping exists, otherwise this function call is undefined behaviour. + */ + template ::type> + reverse_iterator rend() noexcept { + return pimpl_->rend(); + } + const_reverse_iterator rend() const noexcept { return pimpl_->rend(); } + const_reverse_iterator crend() const noexcept { return pimpl_->crend(); } + + /** + * Returns a reference to the `i`th byte from the first requested byte (as returned + * by `data`). If this is invoked when no valid memory mapping has been created + * prior to this call, undefined behaviour ensues. + */ + reference operator[](const size_type i) noexcept { return (*pimpl_)[i]; } + const_reference operator[](const size_type i) const noexcept { return (*pimpl_)[i]; } + + /** + * Establishes a memory mapping with AccessMode. If the mapping is unsuccesful, the + * reason is reported via `error` and the object remains in a state as if this + * function hadn't been called. + * + * `path`, which must be a path to an existing file, is used to retrieve a file + * handle (which is closed when the object destructs or `unmap` is called), which is + * then used to memory map the requested region. Upon failure, `error` is set to + * indicate the reason and the object remains in an unmapped state. + * + * `offset` is the number of bytes, relative to the start of the file, where the + * mapping should begin. When specifying it, there is no need to worry about + * providing a value that is aligned with the operating system's page allocation + * granularity. This is adjusted by the implementation such that the first requested + * byte (as returned by `data` or `begin`), so long as `offset` is valid, will be at + * `offset` from the start of the file. + * + * `length` is the number of bytes to map. It may be `map_entire_file`, in which + * case a mapping of the entire file is created. + */ + template + void map(const String &path, const size_type offset, const size_type length, + std::error_code &error) { + map_impl(path, offset, length, error); + } + + /** + * Establishes a memory mapping with AccessMode. If the mapping is unsuccesful, the + * reason is reported via `error` and the object remains in a state as if this + * function hadn't been called. + * + * `path`, which must be a path to an existing file, is used to retrieve a file + * handle (which is closed when the object destructs or `unmap` is called), which is + * then used to memory map the requested region. Upon failure, `error` is set to + * indicate the reason and the object remains in an unmapped state. + * + * The entire file is mapped. + */ + template void map(const String &path, std::error_code &error) { + map_impl(path, 0, map_entire_file, error); + } + + /** + * Establishes a memory mapping with AccessMode. If the mapping is unsuccesful, the + * reason is reported via `error` and the object remains in a state as if this + * function hadn't been called. + * + * `handle`, which must be a valid file handle, which is used to memory map the + * requested region. Upon failure, `error` is set to indicate the reason and the + * object remains in an unmapped state. + * + * `offset` is the number of bytes, relative to the start of the file, where the + * mapping should begin. When specifying it, there is no need to worry about + * providing a value that is aligned with the operating system's page allocation + * granularity. This is adjusted by the implementation such that the first requested + * byte (as returned by `data` or `begin`), so long as `offset` is valid, will be at + * `offset` from the start of the file. + * + * `length` is the number of bytes to map. It may be `map_entire_file`, in which + * case a mapping of the entire file is created. + */ + void map(const handle_type handle, const size_type offset, const size_type length, + std::error_code &error) { + map_impl(handle, offset, length, error); + } + + /** + * Establishes a memory mapping with AccessMode. If the mapping is unsuccesful, the + * reason is reported via `error` and the object remains in a state as if this + * function hadn't been called. + * + * `handle`, which must be a valid file handle, which is used to memory map the + * requested region. Upon failure, `error` is set to indicate the reason and the + * object remains in an unmapped state. + * + * The entire file is mapped. + */ + void map(const handle_type handle, std::error_code &error) { + map_impl(handle, 0, map_entire_file, error); + } + + /** + * If a valid memory mapping has been created prior to this call, this call + * instructs the kernel to unmap the memory region and disassociate this object + * from the file. + * + * The file handle associated with the file that is mapped is only closed if the + * mapping was created using a file path. If, on the other hand, an existing + * file handle was used to create the mapping, the file handle is not closed. + */ + void unmap() { + if (pimpl_) + pimpl_->unmap(); + } + + void swap(basic_shared_mmap &other) { pimpl_.swap(other.pimpl_); } + + /** Flushes the memory mapped page to disk. Errors are reported via `error`. */ + template ::type> + void sync(std::error_code &error) { + if (pimpl_) + pimpl_->sync(error); + } + + /** All operators compare the underlying `basic_mmap`'s addresses. */ + + friend bool operator==(const basic_shared_mmap &a, const basic_shared_mmap &b) { + return a.pimpl_ == b.pimpl_; + } + + friend bool operator!=(const basic_shared_mmap &a, const basic_shared_mmap &b) { + return !(a == b); + } + + friend bool operator<(const basic_shared_mmap &a, const basic_shared_mmap &b) { + return a.pimpl_ < b.pimpl_; + } + + friend bool operator<=(const basic_shared_mmap &a, const basic_shared_mmap &b) { + return a.pimpl_ <= b.pimpl_; + } + + friend bool operator>(const basic_shared_mmap &a, const basic_shared_mmap &b) { + return a.pimpl_ > b.pimpl_; + } + + friend bool operator>=(const basic_shared_mmap &a, const basic_shared_mmap &b) { + return a.pimpl_ >= b.pimpl_; + } + +private: + template + void map_impl(const MappingToken &token, const size_type offset, const size_type length, + std::error_code &error) { + if (!pimpl_) { + mmap_type mmap = make_mmap(token, offset, length, error); + if (error) { + return; + } + pimpl_ = std::make_shared(std::move(mmap)); + } else { + pimpl_->map(token, offset, length, error); + } + } +}; + +/** + * This is the basis for all read-only mmap objects and should be preferred over + * directly using basic_shared_mmap. + */ +template +using basic_shared_mmap_source = basic_shared_mmap; + +/** + * This is the basis for all read-write mmap objects and should be preferred over + * directly using basic_shared_mmap. + */ +template +using basic_shared_mmap_sink = basic_shared_mmap; + +/** + * These aliases cover the most common use cases, both representing a raw byte stream + * (either with a char or an unsigned char/uint8_t). + */ +using shared_mmap_source = basic_shared_mmap_source; +using shared_ummap_source = basic_shared_mmap_source; + +using shared_mmap_sink = basic_shared_mmap_sink; +using shared_ummap_sink = basic_shared_mmap_sink; + +} // namespace mio + +#endif // MIO_SHARED_MMAP_HEADER +#pragma once +#include + +namespace csv2 { + +namespace trim_policy { +struct no_trimming { +public: + static std::pair trim(const char *buffer, size_t start, size_t end) { + (void)(buffer); // to silence unused parameter warning + return {start, end}; + } +}; + +template struct trim_characters { +private: + constexpr static bool is_trim_char(char) { return false; } + + template constexpr static bool is_trim_char(char c, char head, Tail... tail) { + return c == head || is_trim_char(c, tail...); + } + +public: + static std::pair trim(const char *buffer, size_t start, size_t end) { + size_t new_start = start, new_end = end; + while (new_start != new_end && is_trim_char(buffer[new_start], character_list...)) + ++new_start; + while (new_start != new_end && is_trim_char(buffer[new_end - 1], character_list...)) + --new_end; + return {new_start, new_end}; + } +}; + +using trim_whitespace = trim_characters<' ', '\t'>; +} // namespace trim_policy + +template struct delimiter { + constexpr static char value = character; +}; + +template struct quote_character { + constexpr static char value = character; +}; + +template struct first_row_is_header { + constexpr static bool value = flag; +}; + +}#pragma once +#include +// #include +// #include +#include +#include + +namespace csv2 { + +template , class quote_character = quote_character<'"'>, + class first_row_is_header = first_row_is_header, + class trim_policy = trim_policy::trim_whitespace> +class Reader { + mio::mmap_source mmap_; // mmap source + const char *buffer_{nullptr}; // pointer to memory-mapped data + size_t buffer_size_{0}; // mapped length of buffer + size_t header_start_{0}; // start index of header (cache) + size_t header_end_{0}; // end index of header (cache) + +public: + // Use this if you'd like to mmap the CSV file + template bool mmap(StringType &&filename) { + mmap_ = mio::mmap_source(filename); + if (!mmap_.is_open() || !mmap_.is_mapped()) + return false; + buffer_ = mmap_.data(); + buffer_size_ = mmap_.mapped_length(); + return true; + } + + // Use this if you have the CSV contents + // in an std::string already + template bool parse(StringType &&contents) { + buffer_ = std::forward(contents).c_str(); + buffer_size_ = contents.size(); + return buffer_size_ > 0; + } + + class RowIterator; + class Row; + class CellIterator; + + class Cell { + const char *buffer_{nullptr}; // Pointer to memory-mapped buffer + size_t start_{0}; // Start index of cell content + size_t end_{0}; // End index of cell content + bool escaped_{false}; // Does the cell have escaped content? + friend class Row; + friend class CellIterator; + + public: + // Returns the raw_value of the cell without handling escaped + // content, e.g., cell containing """foo""" will be returned + // as is + template void read_raw_value(Container &result) const { + if (start_ >= end_) + return; + result.reserve(end_ - start_); + for (size_t i = start_; i < end_; ++i) + result.push_back(buffer_[i]); + } + + // If cell is escaped, convert and return correct cell contents, + // e.g., """foo""" => ""foo"" + template void read_value(Container &result) const { + if (start_ >= end_) + return; + result.reserve(end_ - start_); + const auto new_start_end = trim_policy::trim(buffer_, start_, end_); + for (size_t i = new_start_end.first; i < new_start_end.second; ++i) + result.push_back(buffer_[i]); + for (size_t i = 1; i < result.size(); ++i) { + if (result[i] == quote_character::value && result[i - 1] == quote_character::value) { + result.erase(i - 1, 1); + } + } + } + }; + + class Row { + const char *buffer_{nullptr}; // Pointer to memory-mapped buffer + size_t start_{0}; // Start index of row content + size_t end_{0}; // End index of row content + friend class RowIterator; + friend class Reader; + + public: + // Returns the raw_value of the row + template void read_raw_value(Container &result) const { + if (start_ >= end_) + return; + result.reserve(end_ - start_); + for (size_t i = start_; i < end_; ++i) + result.push_back(buffer_[i]); + } + + class CellIterator { + friend class Row; + const char *buffer_; + size_t buffer_size_; + size_t start_; + size_t current_; + size_t end_; + + public: + CellIterator(const char *buffer, size_t buffer_size, size_t start, size_t end) + : buffer_(buffer), buffer_size_(buffer_size), start_(start), current_(start_), end_(end) { + } + + CellIterator &operator++() { + current_ += 1; + return *this; + } + + Cell operator*() { + bool escaped{false}; + class Cell cell; + cell.buffer_ = buffer_; + cell.start_ = current_; + cell.end_ = end_; + + size_t last_quote_location = 0; + bool quote_opened = false; + for (auto i = current_; i < end_; i++) { + current_ = i; + if (buffer_[i] == delimiter::value && !quote_opened) { + // actual delimiter + // end of cell + cell.end_ = current_; + cell.escaped_ = escaped; + return cell; + } else { + if (buffer_[i] == quote_character::value) { + if (!quote_opened) { + // first quote for this cell + quote_opened = true; + last_quote_location = i; + } else { + escaped = (last_quote_location == i - 1); + last_quote_location += (i - last_quote_location) * size_t(!escaped); + quote_opened = escaped || (buffer_[i + 1] != delimiter::value); + } + } + } + } + cell.end_ = current_ + 1; + return cell; + } + + bool operator!=(const CellIterator &rhs) { return current_ != rhs.current_; } + }; + + CellIterator begin() const { return CellIterator(buffer_, end_ - start_, start_, end_); } + CellIterator end() const { return CellIterator(buffer_, end_ - start_, end_, end_); } + }; + + class RowIterator { + friend class Reader; + const char *buffer_; + size_t buffer_size_; + size_t start_; + size_t end_; + + public: + RowIterator(const char *buffer, size_t buffer_size, size_t start) + : buffer_(buffer), buffer_size_(buffer_size), start_(start), end_(start_) {} + + RowIterator &operator++() { + start_ = end_ + 1; + end_ = start_; + return *this; + } + + Row operator*() { + Row result; + result.buffer_ = buffer_; + result.start_ = start_; + result.end_ = end_; + + if (const char *ptr = + static_cast(memchr(&buffer_[start_], '\n', (buffer_size_ - start_)))) { + end_ = start_ + (ptr - &buffer_[start_]); + result.end_ = end_; + start_ = end_ + 1; + } else { + // last row + end_ = buffer_size_; + result.end_ = end_; + } + return result; + } + + bool operator!=(const RowIterator &rhs) { return start_ != rhs.start_; } + }; + + RowIterator begin() const { + if (buffer_size_ == 0) + return end(); + if (first_row_is_header::value) { + const auto header_indices = header_indices_(); + return RowIterator(buffer_, buffer_size_, header_indices.second > 0 ? header_indices.second + 1 : 0); + } else { + return RowIterator(buffer_, buffer_size_, 0); + } + } + + RowIterator end() const { return RowIterator(buffer_, buffer_size_, buffer_size_ + 1); } + +private: + std::pair header_indices_() const { + size_t start = 0, end = 0; + + if (const char *ptr = + static_cast(memchr(&buffer_[start], '\n', (buffer_size_ - start)))) { + end = start + (ptr - &buffer_[start]); + } + return {start, end}; + } + +public: + + Row header() const { + size_t start = 0, end = 0; + Row result; + result.buffer_ = buffer_; + result.start_ = start; + result.end_ = end; + + if (const char *ptr = + static_cast(memchr(&buffer_[start], '\n', (buffer_size_ - start)))) { + end = start + (ptr - &buffer_[start]); + result.end_ = end; + } + return result; + } + + size_t rows() const { + size_t result{0}; + if (!buffer_ || buffer_size_ == 0) + return result; + for (const char *p = buffer_; + (p = static_cast(memchr(p, '\n', (buffer_ + buffer_size_) - p))); ++p) + ++result; + return result; + } + + size_t cols() const { + size_t result{0}; + for (const auto cell : header()) + result += 1; + return result; + } +}; +} // namespace csv2#pragma once +#include +// #include +#include +#include +#include +#include + +namespace csv2 { + +template > +class Writer { + std::ofstream& stream_; // output stream for the writer +public: + template + Writer(Stream&& stream) : stream_(std::forward(stream)) {} + + ~Writer() { + stream_.close(); + } + + template + void write_row(Container&& row) { + const auto& strings = std::forward(row); + const auto delimiter_string = std::string(1, delimiter::value); + std::copy(strings.begin(), strings.end() - 1, + std::ostream_iterator(stream_, delimiter_string.c_str())); + stream_ << strings.back() << "\n"; + } + + template + void write_rows(Container&& rows) { + const auto& container_of_rows = std::forward(rows); + for (const auto& row : container_of_rows) { + write_row(row); + } + } +}; + +} \ No newline at end of file diff --git a/utils/amalgamate/CHANGES.md b/utils/amalgamate/CHANGES.md new file mode 100644 index 0000000..728b933 --- /dev/null +++ b/utils/amalgamate/CHANGES.md @@ -0,0 +1,10 @@ +The following changes have been made to the code with respect to : + +- Resolved inspection results from PyCharm: + - replaced tabs with spaces + - added encoding annotation + - reindented file to remove trailing whitespaces + - unused import `sys` + - membership check + - made function from `_is_within` + - removed unused variable `actual_path` diff --git a/utils/amalgamate/LICENSE.md b/utils/amalgamate/LICENSE.md new file mode 100644 index 0000000..7fe9cf0 --- /dev/null +++ b/utils/amalgamate/LICENSE.md @@ -0,0 +1,27 @@ +amalgamate.py - Amalgamate C source and header files +Copyright (c) 2012, Erik Edlund + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of Erik Edlund, nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/utils/amalgamate/README.md b/utils/amalgamate/README.md new file mode 100644 index 0000000..975ca0b --- /dev/null +++ b/utils/amalgamate/README.md @@ -0,0 +1,66 @@ + +# amalgamate.py - Amalgamate C source and header files + +Origin: https://bitbucket.org/erikedlund/amalgamate + +Mirror: https://github.com/edlund/amalgamate + +`amalgamate.py` aims to make it easy to use SQLite-style C source and header +amalgamation in projects. + +For more information, please refer to: http://sqlite.org/amalgamation.html + +## Here be dragons + +`amalgamate.py` is quite dumb, it only knows the bare minimum about C code +required in order to be able to handle trivial include directives. It can +produce weird results for unexpected code. + +Things to be aware of: + +`amalgamate.py` will not handle complex include directives correctly: + + #define HEADER_PATH "path/to/header.h" + #include HEADER_PATH + +In the above example, `path/to/header.h` will not be included in the +amalgamation (HEADER_PATH is never expanded). + +`amalgamate.py` makes the assumption that each source and header file which +is not empty will end in a new-line character, which is not immediately +preceded by a backslash character (see 5.1.1.2p1.2 of ISO C99). + +`amalgamate.py` should be usable with C++ code, but raw string literals from +C++11 will definitely cause problems: + + R"delimiter(Terrible raw \ data " #include )delimiter" + R"delimiter(Terrible raw \ data " escaping)delimiter" + +In the examples above, `amalgamate.py` will stop parsing the raw string literal +when it encounters the first quotation mark, which will produce unexpected +results. + +## Installing amalgamate.py + +Python v.2.7.0 or higher is required. + +`amalgamate.py` can be tested and installed using the following commands: + + ./test.sh && sudo -k cp ./amalgamate.py /usr/local/bin/ + +## Using amalgamate.py + + amalgamate.py [-v] -c path/to/config.json -s path/to/source/dir \ + [-p path/to/prologue.(c|h)] + + * The `-c, --config` option should specify the path to a JSON config file which + lists the source files, include paths and where to write the resulting + amalgamation. Have a look at `test/source.c.json` and `test/include.h.json` + to see two examples. + + * The `-s, --source` option should specify the path to the source directory. + This is useful for supporting separate source and build directories. + + * The `-p, --prologue` option should specify the path to a file which will be + added to the beginning of the amalgamation. It is optional. + diff --git a/utils/amalgamate/amalgamate.py b/utils/amalgamate/amalgamate.py new file mode 100644 index 0000000..a3e3df5 --- /dev/null +++ b/utils/amalgamate/amalgamate.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python +# coding=utf-8 + +# amalgamate.py - Amalgamate C source and header files. +# Copyright (c) 2012, Erik Edlund +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Erik Edlund, nor the names of its contributors may +# be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import datetime +import json +import os +import re + + +class Amalgamation(object): + + # Prepends self.source_path to file_path if needed. + def actual_path(self, file_path): + if not os.path.isabs(file_path): + file_path = os.path.join(self.source_path, file_path) + return file_path + + # Search included file_path in self.include_paths and + # in source_dir if specified. + def find_included_file(self, file_path, source_dir): + search_dirs = self.include_paths[:] + if source_dir: + search_dirs.insert(0, source_dir) + + for search_dir in search_dirs: + search_path = os.path.join(search_dir, file_path) + if os.path.isfile(self.actual_path(search_path)): + return search_path + return None + + def __init__(self, args): + with open(args.config, 'r') as f: + config = json.loads(f.read()) + for key in config: + setattr(self, key, config[key]) + + self.verbose = args.verbose == "yes" + self.prologue = args.prologue + self.source_path = args.source_path + self.included_files = [] + + # Generate the amalgamation and write it to the target file. + def generate(self): + amalgamation = "" + + if self.prologue: + with open(self.prologue, 'r') as f: + amalgamation += datetime.datetime.now().strftime(f.read()) + + if self.verbose: + print("Config:") + print(" target = {0}".format(self.target)) + print(" working_dir = {0}".format(os.getcwd())) + print(" include_paths = {0}".format(self.include_paths)) + print("Creating amalgamation:") + for file_path in self.sources: + # Do not check the include paths while processing the source + # list, all given source paths must be correct. + # actual_path = self.actual_path(file_path) + print(" - processing \"{0}\"".format(file_path)) + t = TranslationUnit(file_path, self, True) + amalgamation += t.content + + with open(self.target, 'w') as f: + f.write(amalgamation) + + print("...done!\n") + if self.verbose: + print("Files processed: {0}".format(self.sources)) + print("Files included: {0}".format(self.included_files)) + print("") + + +def _is_within(match, matches): + for m in matches: + if match.start() > m.start() and \ + match.end() < m.end(): + return True + return False + + +class TranslationUnit(object): + # // C++ comment. + cpp_comment_pattern = re.compile(r"//.*?\n") + + # /* C comment. */ + c_comment_pattern = re.compile(r"/\*.*?\*/", re.S) + + # "complex \"stri\\\ng\" value". + string_pattern = re.compile("[^']" r'".*?(?<=[^\\])"', re.S) + + # Handle simple include directives. Support for advanced + # directives where macros and defines needs to expanded is + # not a concern right now. + include_pattern = re.compile( + r'#\s*include\s+(<|")(?P.*?)("|>)', re.S) + + # #pragma once + pragma_once_pattern = re.compile(r'#\s*pragma\s+once', re.S) + + # Search for pattern in self.content, add the match to + # contexts if found and update the index accordingly. + def _search_content(self, index, pattern, contexts): + match = pattern.search(self.content, index) + if match: + contexts.append(match) + return match.end() + return index + 2 + + # Return all the skippable contexts, i.e., comments and strings + def _find_skippable_contexts(self): + # Find contexts in the content in which a found include + # directive should not be processed. + skippable_contexts = [] + + # Walk through the content char by char, and try to grab + # skippable contexts using regular expressions when found. + i = 1 + content_len = len(self.content) + while i < content_len: + j = i - 1 + current = self.content[i] + previous = self.content[j] + + if current == '"': + # String value. + i = self._search_content(j, self.string_pattern, + skippable_contexts) + elif current == '*' and previous == '/': + # C style comment. + i = self._search_content(j, self.c_comment_pattern, + skippable_contexts) + elif current == '/' and previous == '/': + # C++ style comment. + i = self._search_content(j, self.cpp_comment_pattern, + skippable_contexts) + else: + # Skip to the next char. + i += 1 + + return skippable_contexts + + # Returns True if the match is within list of other matches + + # Removes pragma once from content + def _process_pragma_once(self): + content_len = len(self.content) + if content_len < len("#include "): + return 0 + + # Find contexts in the content in which a found include + # directive should not be processed. + skippable_contexts = self._find_skippable_contexts() + + pragmas = [] + pragma_once_match = self.pragma_once_pattern.search(self.content) + while pragma_once_match: + if not _is_within(pragma_once_match, skippable_contexts): + pragmas.append(pragma_once_match) + + pragma_once_match = self.pragma_once_pattern.search(self.content, + pragma_once_match.end()) + + # Handle all collected pragma once directives. + prev_end = 0 + tmp_content = '' + for pragma_match in pragmas: + tmp_content += self.content[prev_end:pragma_match.start()] + prev_end = pragma_match.end() + tmp_content += self.content[prev_end:] + self.content = tmp_content + + # Include all trivial #include directives into self.content. + def _process_includes(self): + content_len = len(self.content) + if content_len < len("#include "): + return 0 + + # Find contexts in the content in which a found include + # directive should not be processed. + skippable_contexts = self._find_skippable_contexts() + + # Search for include directives in the content, collect those + # which should be included into the content. + includes = [] + include_match = self.include_pattern.search(self.content) + while include_match: + if not _is_within(include_match, skippable_contexts): + include_path = include_match.group("path") + search_same_dir = include_match.group(1) == '"' + found_included_path = self.amalgamation.find_included_file( + include_path, self.file_dir if search_same_dir else None) + if found_included_path: + includes.append((include_match, found_included_path)) + + include_match = self.include_pattern.search(self.content, + include_match.end()) + + # Handle all collected include directives. + prev_end = 0 + tmp_content = '' + for include in includes: + include_match, found_included_path = include + tmp_content += self.content[prev_end:include_match.start()] + tmp_content += "// {0}".format(include_match.group(0)) + if found_included_path not in self.amalgamation.included_files: + t = TranslationUnit(found_included_path, self.amalgamation, False) + tmp_content += t.content + prev_end = include_match.end() + tmp_content += self.content[prev_end:] + self.content = tmp_content + + return len(includes) + + # Make all content processing + def _process(self): + if not self.is_root: + self._process_pragma_once() + self._process_includes() + + def __init__(self, file_path, amalgamation, is_root): + self.file_path = file_path + self.file_dir = os.path.dirname(file_path) + self.amalgamation = amalgamation + self.is_root = is_root + + self.amalgamation.included_files.append(self.file_path) + + actual_path = self.amalgamation.actual_path(file_path) + if not os.path.isfile(actual_path): + raise IOError("File not found: \"{0}\"".format(file_path)) + with open(actual_path, 'r') as f: + self.content = f.read() + self._process() + + +def main(): + description = "Amalgamate C source and header files." + usage = " ".join([ + "amalgamate.py", + "[-v]", + "-c path/to/config.json", + "-s path/to/source/dir", + "[-p path/to/prologue.(c|h)]" + ]) + argsparser = argparse.ArgumentParser( + description=description, usage=usage) + + argsparser.add_argument("-v", "--verbose", dest="verbose", + choices=["yes", "no"], metavar="", help="be verbose") + + argsparser.add_argument("-c", "--config", dest="config", + required=True, metavar="", help="path to a JSON config file") + + argsparser.add_argument("-s", "--source", dest="source_path", + required=True, metavar="", help="source code path") + + argsparser.add_argument("-p", "--prologue", dest="prologue", + required=False, metavar="", help="path to a C prologue file") + + amalgamation = Amalgamation(argsparser.parse_args()) + amalgamation.generate() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/utils/amalgamate/config.json b/utils/amalgamate/config.json new file mode 100644 index 0000000..4e215fc --- /dev/null +++ b/utils/amalgamate/config.json @@ -0,0 +1,8 @@ +{ + "project": "cgame competitive programming for codingame", + "target": "AllTrees.cpp", + "sources": [ + "test/gametheory/TreesTest.cpp" + ], + "include_paths": ["include"] +} From 5b6990b690e2982c33ad9b14bc537a041e8b0a65 Mon Sep 17 00:00:00 2001 From: Pranav Srinivas Kumar Date: Mon, 27 Jul 2020 10:18:58 -0500 Subject: [PATCH 5/5] Minor updates to single header include generation --- README.md | 1 + include/csv2/mio.hpp | 1 + include/csv2/parameters.hpp | 1 + include/csv2/reader.hpp | 1 + include/csv2/writer.hpp | 1 + single_include/csv2/csv2.hpp | 8 ++++++-- 6 files changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 355c46f..5ee9c0c 100644 --- a/README.md +++ b/README.md @@ -159,6 +159,7 @@ int main() { }; writer.write_rows(rows); + stream.close(); } ``` diff --git a/include/csv2/mio.hpp b/include/csv2/mio.hpp index 627af43..48a8668 100644 --- a/include/csv2/mio.hpp +++ b/include/csv2/mio.hpp @@ -1,3 +1,4 @@ + /* Copyright 2017 https://github.com/mandreyel * * Permission is hereby granted, free of charge, to any person obtaining a copy of this diff --git a/include/csv2/parameters.hpp b/include/csv2/parameters.hpp index d9f72b3..f3a858c 100644 --- a/include/csv2/parameters.hpp +++ b/include/csv2/parameters.hpp @@ -1,3 +1,4 @@ + #pragma once #include diff --git a/include/csv2/reader.hpp b/include/csv2/reader.hpp index ba823fb..45ffb80 100644 --- a/include/csv2/reader.hpp +++ b/include/csv2/reader.hpp @@ -1,3 +1,4 @@ + #pragma once #include #include diff --git a/include/csv2/writer.hpp b/include/csv2/writer.hpp index f53047b..eb0da78 100644 --- a/include/csv2/writer.hpp +++ b/include/csv2/writer.hpp @@ -1,3 +1,4 @@ + #pragma once #include #include diff --git a/single_include/csv2/csv2.hpp b/single_include/csv2/csv2.hpp index 57cb43d..f9997e0 100644 --- a/single_include/csv2/csv2.hpp +++ b/single_include/csv2/csv2.hpp @@ -1,3 +1,4 @@ + /* Copyright 2017 https://github.com/mandreyel * * Permission is hereby granted, free of charge, to any person obtaining a copy of this @@ -1559,6 +1560,7 @@ using shared_ummap_sink = basic_shared_mmap_sink; } // namespace mio #endif // MIO_SHARED_MMAP_HEADER + #pragma once #include @@ -1607,7 +1609,8 @@ template struct first_row_is_header { constexpr static bool value = flag; }; -}#pragma once +} +#pragma once #include // #include // #include @@ -1860,7 +1863,8 @@ class Reader { return result; } }; -} // namespace csv2#pragma once +} // namespace csv2 +#pragma once #include // #include #include